xref: /phasta/M2NFixBnd/src/commuMax.f (revision 16223cb9c3f88b34f2cb94151b5cf5ffc1aac5e2)
1      subroutine commuMax (global, ilwork,  n,  code)
2c---------------------------------------------------------------------
3c
4c This subroutine is responsible for interprocessor communication of
5c the residual and solution vectors.
6c
7c input:
8c     global(nshg,n): global vector to be communicated. Note that
9c                      this vector is local to the processor, (i.e.
10c                      not distributed across processors)
11c     ilwork(nlwork):  this is the local interprocessor work array.
12c                      This array is local to the processor, (i.e.
13c                      each processor has a unique ilwork array.
14c     n:               second dimension of the array to be communicated
15c     code:            = 'in' for communicating with the residual
16c                      = 'out' for cummunicating the solution
17c
18c---------------------------------------------------------------------
19c
20c The array ilwork describes the details of the communications.
21c Each communication step (call of this routine) consists of a
22c sequence of "tasks", where a task is defined as a communication
23c between two processors where data is exchanged. This would imply
24c that for a given processor, there will be as many tasks as there
25c are processors with which it must communicate. Details of the
26c ilwork array appear below.
27c
28c---------------------------------------------------------------------
29c
30      include "commonM2NFixBnd.h"
31      include "mpif.h"
32      include "auxmpiM2NFixBnd.h"
33      integer status(MPI_STATUS_SIZE), ierr
34      integer stat(MPI_STATUS_SIZE, 2*maxtask), req(2*maxtask)
35      real*8  rDelISend, rDelIRecv, rDelWaitAll
36
37      dimension global(nshg,n),
38     &          rtemp(maxfront*n,maxtask),
39     &          ilwork(nlwork)
40
41      character*3 code
42
43      if(impistat2.eq.1) call MPI_BARRIER (MPI_COMM_WORLD, ierr)
44      if(impistat.eq.1) rDelIRecv = zero
45      if(impistat.eq.1) rDelISend = zero
46      if(impistat.eq.1) rDelWaitAll = zero
47
48
49      if (code .ne. 'in ' .and. code .ne. 'out')
50     &  call error ('commu   ','code    ',0)
51
52
53      if     (n .eq. 1)      then        ! like a scalar
54        kdof = 1
55      elseif (n .eq. nsd)    then        ! like the normal vectors
56        kdof = 2
57      elseif (n .eq. ndof)   then        ! res, y, ac, krylov vectors....
58        kdof = 3
59      elseif (n .eq. nflow*nflow) then     ! bdiag
60        kdof = 4
61      elseif (n .eq. (nflow-1)*nsd) then  ! qres
62        kdof = 5
63      elseif (n .eq. nflow) then
64        kdof = 6
65      elseif (n .eq. 24 ) then
66        kdof = 7
67      elseif (n .eq. 9) then
68        kdof = 8
69      elseif (n .eq. 11 ) then
70        kdof = 9
71      elseif (n .eq. 7 ) then
72        kdof = 10
73!      elseif (n .eq. 33 ) then ! hack
74      elseif (n .eq. 13 ) then ! for error
75         kdof = 11
76!      elseif (n .eq. 22 ) then
77      elseif (n .eq. 17 ) then
78         kdof = 12
79      elseif (n .eq. 16 ) then
80         kdof = 13
81      elseif (n .eq. 10 ) then
82         kdof = 14
83      elseif (n .eq. nflow*nsd ) then   !surface tension + qres
84         kdof = 15
85      else
86        call error ('commuMax','n       ',n)
87      endif
88
89c... Note that when adding another kdof to the above set, we must
90c... also make changes in ctypes.f and auxmpi.h
91
92c---------------------------------------------------------------------
93c  ilwork(1): number of tasks
94c
95c  The following information is contained in ilwork for each task:
96c     itag: tag of the communication
97c     iacc: == 0 if task is a send
98c           == 1 if task is a recieve
99c     iother: rank of processor with which this communication occurs
100c     numseg: number of data "segments" to be sent or recieved. A
101c             segment is defined as a continuous section of the global
102c             vector to be communicated, (i.e. a group of nodes (or,
103c             rather, "shape function coefficients") which occur
104c             sequentially in the array global(nshg,n)).
105c     isbeg:  location of the first segment in the array owned by the
106c             current processor.
107c
108c The two types of communication are 'in', where the residual is being
109c communicated, and 'out', where the solution is being communicated.
110c Note that when the type is 'out', senders recieve and recievers send.
111c
112c The following comment pertains to a communication of type 'in':
113c
114c     If the task is a send, then all of the numseg segments are
115c     sent with a single call to MPI_SEND. Where these segments live in
116c     the array is built into the array sevsegtype, which is a common
117c     array constructed in the subroutine "ctypes.f". In other words,
118c     sevsegtype is a data type that describes the indices of the blocks
119c     to be sent, in terms of there beginning index, and the length of
120c     each segment. Using this, we can make a single send to take care of
121c     all the segments for this task.
122c
123c     If the task is a recieve, then once the vector is recieved, the
124c     recieved segments must be added to the correct locations in the
125c     current array. These locations are described in ilwork as the
126c     beginning position, then the length of the segment.
127c
128c---------------------------------------------------------------------
129      numtask = ilwork(1)
130
131      itkbeg = 1
132      m = 0
133      idl=0
134
135      DO itask = 1, numtask
136        m      = m + 1
137        itag   = ilwork (itkbeg + 1)
138        iacc   = ilwork (itkbeg + 2)
139        iother = ilwork (itkbeg + 3)
140        numseg = ilwork (itkbeg + 4)
141        isgbeg = ilwork (itkbeg + 5)
142c
143c.... if iacc == 0, then this task is a send.
144c     slave
145c
146        if (iacc .EQ. 0) then
147c
148c.... residual communication
149c
150          if (code .eq. 'in ') then
151            if(impistat.eq.1) iISend = iISend+1
152            if(impistat.eq.1) rmpitmr = TMRC()
153            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
154     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
155            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
156            if(impistat.eq.1) rISend = rISend+rDelISend
157          endif
158c
159c.... solution communication
160c
161          if (code .eq. 'out') then
162            if(impistat.eq.1) iIRecv = iIRecv+1
163            if(impistat.eq.1) rmpitmr = TMRC()
164            call MPI_IRECV(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
165     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
166            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
167            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
168          endif
169c
170c.... if iacc == 1, then this task is a recieve.
171c     master
172c
173        else
174          if (code .eq. 'in ') then
175c
176c.... determine the number of total number of nodes involved in this
177c     communication (lfront), including all segments
178c
179            lfront = 0
180            do is = 1,numseg
181              lenseg = ilwork (itkbeg + 4 + 2*is)
182              lfront = lfront + lenseg
183            enddo
184c
185c.... recieve all segments for this task in a single step
186c
187            idl=idl+1 ! stands for i Do Later, the number to fix later
188            if(impistat.eq.1) iIRecv = iIRecv+1
189            if(impistat.eq.1) rmpitmr = TMRC()
190            call MPI_IRECV(rtemp(1,idl), lfront*n, MPI_DOUBLE_PRECISION,
191     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
192            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
193            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
194          endif
195          if (code .eq. 'out') then
196            if(impistat.eq.1) iISend = iISend+1
197            if(impistat.eq.1) rmpitmr = TMRC()
198            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
199     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
200            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
201            if(impistat.eq.1) rISend = rISend+rDelISend
202          endif
203        endif
204
205        itkbeg = itkbeg + 4 + 2*numseg
206
207      enddo   !! end tasks loop
208
209      if(impistat.eq.1) iWaitAll = iWaitAll+1
210      if(impistat.eq.1) rmpitmr = TMRC()
211      call MPI_WAITALL(m, req, stat, ierr)
212      if(impistat.eq.1) rDelWaitAll = TMRC()-rmpitmr
213      if(impistat.eq.1) rWaitAll = rWaitAll+rDelWaitAll
214      if(impistat.eq.1) rCommu = rCommu+rDelIRecv+rDelISend+rDelWaitAll
215
216c
217c     Stuff added below is a delayed assembly of that which was communicated
218c     above but due to the switch to non-blocking receivves could not be
219c     assembled until after the waitall.  Only necessary for commu "in"
220c
221
222      if(code .eq. 'in ') then
223         itkbeg=1
224         jdl=0
225         do j=1,numtask         ! time to do all the segments that needed to be
226                                ! assembled into the global vector
227
228            iacc   = ilwork (itkbeg + 2)
229            numseg = ilwork (itkbeg + 4)
230            isgbeg = ilwork (itkbeg + 5)
231            if(iacc.eq.1) then
232               jdl=jdl+1  ! keep track of order of rtemp's
233c
234c... add the recieved data to the global array on the current processor.
235c    Note that this involves splitting up the chunk of recieved data
236c    into its correct segment locations for the current processor.
237c
238               itemp = 1
239               do idof = 1,n
240                  do is = 1,numseg
241                 isgbeg = ilwork (itkbeg + 3 + 2*is)
242                 lenseg = ilwork (itkbeg + 4 + 2*is)
243                 isgend = isgbeg + lenseg - 1
244c                 global(isgbeg:isgend,idof) = global(isgbeg:isgend,idof)
245c     &                                + rtemp (itemp:itemp+lenseg-1,jdl)
246                 do k=isgbeg,isgend  ! break this into an explicit loop an max instead of accumulate
247                 global(k,idof) = max(global(k,idof),rtemp (itemp,jdl))
248                 itemp=itemp+1   ! advance this index one at a time instead of in lenseg jumps
249                 enddo
250c                 itemp = itemp + lenseg
251                  enddo
252               enddo
253            endif ! end of receive (iacc=1)
254            itkbeg = itkbeg + 4 + 2*numseg
255         enddo
256      endif  ! commu "in"
257      return
258      end
259
260
261
262