xref: /phasta/phSolver/common/commu.f (revision 4d60bba28c1e1f3ca80b42756ae9dcbcd5c4bc48)
1      subroutine commu (global, ilwork,  n,  code)
2c---------------------------------------------------------------------
3c
4c This subroutine is responsible for interprocessor communication of
5c the residual and solution vectors.
6c
7c input:
8c     global(nshg,n): global vector to be communicated. Note that
9c                      this vector is local to the processor, (i.e.
10c                      not distributed across processors)
11c     ilwork(nlwork):  this is the local interprocessor work array.
12c                      This array is local to the processor, (i.e.
13c                      each processor has a unique ilwork array.
14c     n:               second dimension of the array to be communicated
15c     code:            = 'in' for communicating with the residual
16c                      = 'out' for cummunicating the solution
17c
18c---------------------------------------------------------------------
19c
20c The array ilwork describes the details of the communications.
21c Each communication step (call of this routine) consists of a
22c sequence of "tasks", where a task is defined as a communication
23c between two processors where data is exchanged. This would imply
24c that for a given processor, there will be as many tasks as there
25c are processors with which it must communicate. Details of the
26c ilwork array appear below.
27c
28c---------------------------------------------------------------------
29c
30      include "common.h"
31      include "mpif.h"
32      include "auxmpi.h"
33      integer status(MPI_STATUS_SIZE), ierr
34      integer stat(MPI_STATUS_SIZE, 2*maxtask), req(2*maxtask)
35      real*8  rDelISend, rDelIRecv, rDelWaitAll
36
37      dimension global(nshg,n),
38     &          rtemp(maxfront*n,maxtask),
39     &          ilwork(nlwork)
40
41      character*3 code
42
43      if(impistat2.eq.1) call MPI_BARRIER (MPI_COMM_WORLD, ierr)
44      if(impistat.gt.0) rDelIRecv = zero
45      if(impistat.gt.0) rDelISend = zero
46      if(impistat.gt.0) rDelWaitAll = zero
47
48      if (code .ne. 'in ' .and. code .ne. 'out')
49     &  call error ('commu   ','code    ',0)
50
51      if     (n .eq. 1)      then        ! like a scalar
52        kdof = 1
53      elseif (n .eq. nsd)    then        ! like the normal vectors
54        kdof = 2
55      elseif (n .eq. ndof)   then        ! res, y, ac, krylov vectors....
56        kdof = 3
57      elseif (n .eq. nflow*nflow) then     ! bdiag
58        kdof = 4
59      elseif (n .eq. (nflow-1)*nsd) then  ! qres
60        kdof = 5
61      elseif (n .eq. nflow) then
62        kdof = 6
63      elseif (n .eq. 24 ) then
64        kdof = 7
65      elseif (n .eq. 9) then
66        kdof = 8
67      elseif (n .eq. 11 ) then
68        kdof = 9
69      elseif (n .eq. 7 ) then
70        kdof = 10
71      elseif (n .eq. 33 ) then
72         kdof = 11
73      elseif (n .eq. 22 ) then
74         kdof = 12
75      elseif (n .eq. 16 ) then
76         kdof = 13
77      elseif (n .eq. 10 ) then
78         kdof = 14
79      elseif (n .eq. nflow*nsd ) then   !surface tension + qres
80         kdof = 15
81      else
82        call error ('commu   ','n       ',n)
83      endif
84
85c... Note that when adding another kdof to the above set, we must
86c... also make changes in ctypes.f and auxmpi.h
87
88c---------------------------------------------------------------------
89c  ilwork(1): number of tasks
90c
91c  The following information is contained in ilwork for each task:
92c     itag: tag of the communication
93c     iacc: == 0 if task is a send
94c           == 1 if task is a recieve
95c     iother: rank of processor with which this communication occurs
96c     numseg: number of data "segments" to be sent or recieved. A
97c             segment is defined as a continuous section of the global
98c             vector to be communicated, (i.e. a group of nodes (or,
99c             rather, "shape function coefficients") which occur
100c             sequentially in the array global(nshg,n)).
101c     isbeg:  location of the first segment in the array owned by the
102c             current processor.
103c
104c The two types of communication are 'in', where the residual is being
105c communicated, and 'out', where the solution is being communicated.
106c Note that when the type is 'out', senders recieve and recievers send.
107c
108c The following comment pertains to a communication of type 'in':
109c
110c     If the task is a send, then all of the numseg segments are
111c     sent with a single call to MPI_SEND. Where these segments live in
112c     the array is built into the array sevsegtype, which is a common
113c     array constructed in the subroutine "ctypes.f". In other words,
114c     sevsegtype is a data type that describes the indices of the blocks
115c     to be sent, in terms of there beginning index, and the length of
116c     each segment. Using this, we can make a single send to take care of
117c     all the segments for this task.
118c
119c     If the task is a recieve, then once the vector is recieved, the
120c     recieved segments must be added to the correct locations in the
121c     current array. These locations are described in ilwork as the
122c     beginning position, then the length of the segment.
123c
124c---------------------------------------------------------------------
125      numtask = ilwork(1)
126
127      itkbeg = 1
128      m = 0
129      idl=0
130
131      DO itask = 1, numtask
132        m      = m + 1
133        itag   = ilwork (itkbeg + 1)
134        iacc   = ilwork (itkbeg + 2)
135        iother = ilwork (itkbeg + 3)
136        numseg = ilwork (itkbeg + 4)
137        isgbeg = ilwork (itkbeg + 5)
138c
139c.... if iacc == 0, then this task is a send.
140c     slave
141c
142        if (iacc .EQ. 0) then
143c
144c.... residual communication
145c
146          if (code .eq. 'in ') then
147            if(impistat.eq.1) then
148              iISend = iISend+1
149            elseif(impistat.eq.2) then
150               iISendScal = iISendScal+1
151            endif
152            if(impistat.gt.0) rmpitmr = TMRC()
153            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
154     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
155            if(impistat.gt.0) rDelISend = TMRC()-rmpitmr
156            if(impistat.eq.1) then
157              rISend = rISend+rDelISend
158            elseif(impistat.eq.2) then
159              rISendScal = rISendScal+rDelISend
160            endif
161          endif
162c
163c.... solution communication
164c
165          if (code .eq. 'out') then
166            if(impistat.eq.1) then
167              iIRecv = iIRecv+1
168            elseif(impistat.eq.2) then
169               iIRecvScal = iIRecvScal+1
170            endif
171            if(impistat.gt.0) rmpitmr = TMRC()
172            call MPI_IRECV(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
173     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
174            if(impistat.gt.0) rDelIRecv = TMRC()-rmpitmr
175            if(impistat.eq.1) then
176              rIRecv = rIRecv+rDelIRecv
177            elseif(impistat.eq.2) then
178              rIRecvScal = rIRecvScal+rDelIRecv
179            endif
180          endif
181c
182c.... if iacc == 1, then this task is a recieve.
183c     master
184c
185        else
186          if (code .eq. 'in ') then
187c
188c.... determine the number of total number of nodes involved in this
189c     communication (lfront), including all segments
190c
191            lfront = 0
192            do is = 1,numseg
193              lenseg = ilwork (itkbeg + 4 + 2*is)
194              lfront = lfront + lenseg
195            enddo
196c
197c.... recieve all segments for this task in a single step
198c
199            idl=idl+1 ! stands for i Do Later, the number to fix later
200            if(impistat.eq.1) then
201              iIRecv = iIRecv+1
202            elseif(impistat.eq.2) then
203              iIRecvScal = iIRecvScal+1
204            endif
205            if(impistat.gt.0) rmpitmr = TMRC()
206            call MPI_IRECV(rtemp(1,idl), lfront*n, MPI_DOUBLE_PRECISION,
207     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
208            if(impistat.gt.0) rDelIRecv = TMRC()-rmpitmr
209            if(impistat.eq.1) then
210               rIRecv = rIRecv+rDelIRecv
211            elseif(impistat.eq.2) then
212               rIRecvScal = rIRecvScal+rDelIRecv
213            endif
214          endif
215          if (code .eq. 'out') then
216            if(impistat.eq.1) then
217              iISend = iISend+1
218            elseif(impistat.eq.2) then
219              iISendScal = iISendScal+1
220            endif
221            if(impistat.gt.0) rmpitmr = TMRC()
222            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
223     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
224            if(impistat.gt.0) rDelISend = TMRC()-rmpitmr
225            if(impistat.eq.1) then
226               rISend = rISend+rDelISend
227            elseif(impistat.eq.2) then
228               rISendScal = rISendScal+rDelISend
229            endif
230          endif
231        endif
232
233        itkbeg = itkbeg + 4 + 2*numseg
234
235      enddo   !! end tasks loop
236
237      if(impistat.eq.1) then
238        iWaitAll = iWaitAll+1
239      elseif(impistat.eq.2) then
240         iWaitAllScal = iWaitAllScal+1
241      endif
242      if(impistat.gt.0) rmpitmr = TMRC()
243      call MPI_WAITALL(m, req, stat, ierr)
244      if(impistat.gt.0) rDelWaitAll = TMRC()-rmpitmr
245      if(impistat.eq.1) then
246        rWaitAll = rWaitAll+rDelWaitAll
247        rCommu = rCommu+rDelIRecv+rDelISend+rDelWaitAll
248      elseif(impistat.eq.2) then
249        rWaitAllScal = rWaitAllScal+rDelWaitAll
250        rCommuScal = rCommuScal+rDelIRecv+rDelISend+rDelWaitAll
251      endif
252
253c
254c     Stuff added below is a delayed assembly of that which was communicated
255c     above but due to the switch to non-blocking receivves could not be
256c     assembled until after the waitall.  Only necessary for commu "in"
257c
258
259      if(code .eq. 'in ') then
260         itkbeg=1
261         jdl=0
262         do j=1,numtask         ! time to do all the segments that needed to be
263                                ! assembled into the global vector
264
265            iacc   = ilwork (itkbeg + 2)
266            numseg = ilwork (itkbeg + 4)
267            isgbeg = ilwork (itkbeg + 5)
268            if(iacc.eq.1) then
269               jdl=jdl+1  ! keep track of order of rtemp's
270c
271c... add the recieved data to the global array on the current processor.
272c    Note that this involves splitting up the chunk of recieved data
273c    into its correct segment locations for the current processor.
274c
275               itemp = 1
276               do idof = 1,n
277                  do is = 1,numseg
278                 isgbeg = ilwork (itkbeg + 3 + 2*is)
279                 lenseg = ilwork (itkbeg + 4 + 2*is)
280                 isgend = isgbeg + lenseg - 1
281                 global(isgbeg:isgend,idof) = global(isgbeg:isgend,idof)
282     &                                + rtemp (itemp:itemp+lenseg-1,jdl)
283                 itemp = itemp + lenseg
284                  enddo
285               enddo
286            endif ! end of receive (iacc=1)
287            itkbeg = itkbeg + 4 + 2*numseg
288         enddo
289      endif  ! commu "in"
290      return
291      end
292
293
294
295