xref: /phasta/phSolver/common/commu.f (revision 9b8a2b5301dae5602a16bed4b9abe8b4e50204bd)
1      subroutine commu (global, ilwork,  n,  code)
2c---------------------------------------------------------------------
3c
4c This subroutine is responsible for interprocessor communication of
5c the residual and solution vectors.
6c
7c input:
8c     global(nshg,n): global vector to be communicated. Note that
9c                      this vector is local to the processor, (i.e.
10c                      not distributed across processors)
11c     ilwork(nlwork):  this is the local interprocessor work array.
12c                      This array is local to the processor, (i.e.
13c                      each processor has a unique ilwork array.
14c     n:               second dimension of the array to be communicated
15c     code:            = 'in' for communicating with the residual
16c                      = 'out' for cummunicating the solution
17c
18c---------------------------------------------------------------------
19c
20c The array ilwork describes the details of the communications.
21c Each communication step (call of this routine) consists of a
22c sequence of "tasks", where a task is defined as a communication
23c between two processors where data is exchanged. This would imply
24c that for a given processor, there will be as many tasks as there
25c are processors with which it must communicate. Details of the
26c ilwork array appear below.
27c
28c---------------------------------------------------------------------
29c
30      include "common.h"
31      include "mpif.h"
32      include "auxmpi.h"
33      integer status(MPI_STATUS_SIZE), ierr
34      integer stat(MPI_STATUS_SIZE, 2*maxtask), req(2*maxtask)
35      real*8  rDelISend, rDelIRecv, rDelWaitAll
36
37      dimension global(nshg,n),
38     &          rtemp(maxfront*n,maxtask),
39     &          ilwork(nlwork)
40
41      character*3 code
42
43      if(impistat2.eq.1) call MPI_BARRIER (MPI_COMM_WORLD, ierr)
44      if(impistat.gt.0) rDelIRecv = zero
45      if(impistat.gt.0) rDelISend = zero
46      if(impistat.gt.0) rDelWaitAll = zero
47
48      if (code .ne. 'in ' .and. code .ne. 'out') then
49        if(myrank.eq.0) then
50           write(*,*) 'ERROR code != ''in '' || ''out'''
51           write(*,*) 'ERROR code =''', code,''''
52        endif
53        call error ('commu   ','code    ',0)
54      endif
55
56
57      if     (n .eq. 1)      then        ! like a scalar
58        kdof = 1
59      elseif (n .eq. nsd)    then        ! like the normal vectors
60        kdof = 2
61      elseif (n .eq. ndof)   then        ! res, y, ac, krylov vectors....
62        kdof = 3
63      elseif (n .eq. nflow*nflow) then     ! bdiag
64        kdof = 4
65      elseif (n .eq. (nflow-1)*nsd) then  ! qres
66        kdof = 5
67      elseif (n .eq. nflow) then
68        kdof = 6
69      elseif (n .eq. 24 ) then
70        kdof = 7
71      elseif (n .eq. 9) then
72        kdof = 8
73      elseif (n .eq. 11 ) then
74        kdof = 9
75      elseif (n .eq. 7 ) then
76        kdof = 10
77      elseif (n .eq. 33 ) then
78         kdof = 11
79      elseif (n .eq. 22 ) then
80         kdof = 12
81      elseif (n .eq. 16 ) then
82         kdof = 13
83      elseif (n .eq. 10 ) then
84         kdof = 14
85      elseif (n .eq. nflow*nsd ) then   !surface tension + qres
86         kdof = 15
87      else
88        call error ('commu   ','n       ',n)
89      endif
90
91c... Note that when adding another kdof to the above set, we must
92c... also make changes in ctypes.f and auxmpi.h
93
94c---------------------------------------------------------------------
95c  ilwork(1): number of tasks
96c
97c  The following information is contained in ilwork for each task:
98c     itag: tag of the communication
99c     iacc: == 0 if task is a send
100c           == 1 if task is a recieve
101c     iother: rank of processor with which this communication occurs
102c     numseg: number of data "segments" to be sent or recieved. A
103c             segment is defined as a continuous section of the global
104c             vector to be communicated, (i.e. a group of nodes (or,
105c             rather, "shape function coefficients") which occur
106c             sequentially in the array global(nshg,n)).
107c     isbeg:  location of the first segment in the array owned by the
108c             current processor.
109c
110c The two types of communication are 'in', where the residual is being
111c communicated, and 'out', where the solution is being communicated.
112c Note that when the type is 'out', senders recieve and recievers send.
113c
114c The following comment pertains to a communication of type 'in':
115c
116c     If the task is a send, then all of the numseg segments are
117c     sent with a single call to MPI_SEND. Where these segments live in
118c     the array is built into the array sevsegtype, which is a common
119c     array constructed in the subroutine "ctypes.f". In other words,
120c     sevsegtype is a data type that describes the indices of the blocks
121c     to be sent, in terms of there beginning index, and the length of
122c     each segment. Using this, we can make a single send to take care of
123c     all the segments for this task.
124c
125c     If the task is a recieve, then once the vector is recieved, the
126c     recieved segments must be added to the correct locations in the
127c     current array. These locations are described in ilwork as the
128c     beginning position, then the length of the segment.
129c
130c---------------------------------------------------------------------
131      numtask = ilwork(1)
132
133      itkbeg = 1
134      m = 0
135      idl=0
136
137      DO itask = 1, numtask
138        m      = m + 1
139        itag   = ilwork (itkbeg + 1)
140        iacc   = ilwork (itkbeg + 2)
141        iother = ilwork (itkbeg + 3)
142        numseg = ilwork (itkbeg + 4)
143        isgbeg = ilwork (itkbeg + 5)
144c
145c.... if iacc == 0, then this task is a send.
146c     slave
147c
148        if (iacc .EQ. 0) then
149c
150c.... residual communication
151c
152          if (code .eq. 'in ') then
153            if(impistat.eq.1) then
154              iISend = iISend+1
155            elseif(impistat.eq.2) then
156               iISendScal = iISendScal+1
157            endif
158            if(impistat.gt.0) rmpitmr = TMRC()
159            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
160     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
161            if(impistat.gt.0) rDelISend = TMRC()-rmpitmr
162            if(impistat.eq.1) then
163              rISend = rISend+rDelISend
164            elseif(impistat.eq.2) then
165              rISendScal = rISendScal+rDelISend
166            endif
167          endif
168c
169c.... solution communication
170c
171          if (code .eq. 'out') then
172            if(impistat.eq.1) then
173              iIRecv = iIRecv+1
174            elseif(impistat.eq.2) then
175               iIRecvScal = iIRecvScal+1
176            endif
177            if(impistat.gt.0) rmpitmr = TMRC()
178            call MPI_IRECV(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
179     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
180            if(impistat.gt.0) rDelIRecv = TMRC()-rmpitmr
181            if(impistat.eq.1) then
182              rIRecv = rIRecv+rDelIRecv
183            elseif(impistat.eq.2) then
184              rIRecvScal = rIRecvScal+rDelIRecv
185            endif
186          endif
187c
188c.... if iacc == 1, then this task is a recieve.
189c     master
190c
191        else
192          if (code .eq. 'in ') then
193c
194c.... determine the number of total number of nodes involved in this
195c     communication (lfront), including all segments
196c
197            lfront = 0
198            do is = 1,numseg
199              lenseg = ilwork (itkbeg + 4 + 2*is)
200              lfront = lfront + lenseg
201            enddo
202c
203c.... recieve all segments for this task in a single step
204c
205            idl=idl+1 ! stands for i Do Later, the number to fix later
206            if(impistat.eq.1) then
207              iIRecv = iIRecv+1
208            elseif(impistat.eq.2) then
209              iIRecvScal = iIRecvScal+1
210            endif
211            if(impistat.gt.0) rmpitmr = TMRC()
212            call MPI_IRECV(rtemp(1,idl), lfront*n, MPI_DOUBLE_PRECISION,
213     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
214            if(impistat.gt.0) rDelIRecv = TMRC()-rmpitmr
215            if(impistat.eq.1) then
216               rIRecv = rIRecv+rDelIRecv
217            elseif(impistat.eq.2) then
218               rIRecvScal = rIRecvScal+rDelIRecv
219            endif
220          endif
221          if (code .eq. 'out') then
222            if(impistat.eq.1) then
223              iISend = iISend+1
224            elseif(impistat.eq.2) then
225              iISendScal = iISendScal+1
226            endif
227            if(impistat.gt.0) rmpitmr = TMRC()
228            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
229     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
230            if(impistat.gt.0) rDelISend = TMRC()-rmpitmr
231            if(impistat.eq.1) then
232               rISend = rISend+rDelISend
233            elseif(impistat.eq.2) then
234               rISendScal = rISendScal+rDelISend
235            endif
236          endif
237        endif
238
239        itkbeg = itkbeg + 4 + 2*numseg
240
241      enddo   !! end tasks loop
242
243      if(impistat.eq.1) then
244        iWaitAll = iWaitAll+1
245      elseif(impistat.eq.2) then
246         iWaitAllScal = iWaitAllScal+1
247      endif
248      if(impistat.gt.0) rmpitmr = TMRC()
249      call MPI_WAITALL(m, req, stat, ierr)
250      if(impistat.gt.0) rDelWaitAll = TMRC()-rmpitmr
251      if(impistat.eq.1) then
252        rWaitAll = rWaitAll+rDelWaitAll
253        rCommu = rCommu+rDelIRecv+rDelISend+rDelWaitAll
254      elseif(impistat.eq.2) then
255        rWaitAllScal = rWaitAllScal+rDelWaitAll
256        rCommuScal = rCommuScal+rDelIRecv+rDelISend+rDelWaitAll
257      endif
258
259c
260c     Stuff added below is a delayed assembly of that which was communicated
261c     above but due to the switch to non-blocking receivves could not be
262c     assembled until after the waitall.  Only necessary for commu "in"
263c
264
265      if(code .eq. 'in ') then
266         itkbeg=1
267         jdl=0
268         do j=1,numtask         ! time to do all the segments that needed to be
269                                ! assembled into the global vector
270
271            iacc   = ilwork (itkbeg + 2)
272            numseg = ilwork (itkbeg + 4)
273            isgbeg = ilwork (itkbeg + 5)
274            if(iacc.eq.1) then
275               jdl=jdl+1  ! keep track of order of rtemp's
276c
277c... add the recieved data to the global array on the current processor.
278c    Note that this involves splitting up the chunk of recieved data
279c    into its correct segment locations for the current processor.
280c
281               itemp = 1
282               do idof = 1,n
283                  do is = 1,numseg
284                 isgbeg = ilwork (itkbeg + 3 + 2*is)
285                 lenseg = ilwork (itkbeg + 4 + 2*is)
286                 isgend = isgbeg + lenseg - 1
287                 global(isgbeg:isgend,idof) = global(isgbeg:isgend,idof)
288     &                                + rtemp (itemp:itemp+lenseg-1,jdl)
289                 itemp = itemp + lenseg
290                  enddo
291               enddo
292            endif ! end of receive (iacc=1)
293            itkbeg = itkbeg + 4 + 2*numseg
294         enddo
295      endif  ! commu "in"
296      return
297      end
298
299
300
301