xref: /phasta/M2NFixBnd/src/commuMax.f (revision 595995161822a203c8467e0e4a253d7bd7d6df32)
1*59599516SKenneth E. Jansen      subroutine commuMax (global, ilwork,  n,  code)
2*59599516SKenneth E. Jansenc---------------------------------------------------------------------
3*59599516SKenneth E. Jansenc
4*59599516SKenneth E. Jansenc This subroutine is responsible for interprocessor communication of
5*59599516SKenneth E. Jansenc the residual and solution vectors.
6*59599516SKenneth E. Jansenc
7*59599516SKenneth E. Jansenc input:
8*59599516SKenneth E. Jansenc     global(nshg,n): global vector to be communicated. Note that
9*59599516SKenneth E. Jansenc                      this vector is local to the processor, (i.e.
10*59599516SKenneth E. Jansenc                      not distributed across processors)
11*59599516SKenneth E. Jansenc     ilwork(nlwork):  this is the local interprocessor work array.
12*59599516SKenneth E. Jansenc                      This array is local to the processor, (i.e.
13*59599516SKenneth E. Jansenc                      each processor has a unique ilwork array.
14*59599516SKenneth E. Jansenc     n:               second dimension of the array to be communicated
15*59599516SKenneth E. Jansenc     code:            = 'in' for communicating with the residual
16*59599516SKenneth E. Jansenc                      = 'out' for cummunicating the solution
17*59599516SKenneth E. Jansenc
18*59599516SKenneth E. Jansenc---------------------------------------------------------------------
19*59599516SKenneth E. Jansenc
20*59599516SKenneth E. Jansenc The array ilwork describes the details of the communications.
21*59599516SKenneth E. Jansenc Each communication step (call of this routine) consists of a
22*59599516SKenneth E. Jansenc sequence of "tasks", where a task is defined as a communication
23*59599516SKenneth E. Jansenc between two processors where data is exchanged. This would imply
24*59599516SKenneth E. Jansenc that for a given processor, there will be as many tasks as there
25*59599516SKenneth E. Jansenc are processors with which it must communicate. Details of the
26*59599516SKenneth E. Jansenc ilwork array appear below.
27*59599516SKenneth E. Jansenc
28*59599516SKenneth E. Jansenc---------------------------------------------------------------------
29*59599516SKenneth E. Jansenc
30*59599516SKenneth E. Jansen      include "commonM2NFixBnd.h"
31*59599516SKenneth E. Jansen      include "mpif.h"
32*59599516SKenneth E. Jansen      include "auxmpiM2NFixBnd.h"
33*59599516SKenneth E. Jansen      integer status(MPI_STATUS_SIZE), ierr
34*59599516SKenneth E. Jansen      integer stat(MPI_STATUS_SIZE, 2*maxtask), req(2*maxtask)
35*59599516SKenneth E. Jansen      real*8  rDelISend, rDelIRecv, rDelWaitAll
36*59599516SKenneth E. Jansen
37*59599516SKenneth E. Jansen      dimension global(nshg,n),
38*59599516SKenneth E. Jansen     &          rtemp(maxfront*n,maxtask),
39*59599516SKenneth E. Jansen     &          ilwork(nlwork)
40*59599516SKenneth E. Jansen
41*59599516SKenneth E. Jansen      character*3 code
42*59599516SKenneth E. Jansen
43*59599516SKenneth E. Jansen      if(impistat2.eq.1) call MPI_BARRIER (MPI_COMM_WORLD, ierr)
44*59599516SKenneth E. Jansen      if(impistat.eq.1) rDelIRecv = zero
45*59599516SKenneth E. Jansen      if(impistat.eq.1) rDelISend = zero
46*59599516SKenneth E. Jansen      if(impistat.eq.1) rDelWaitAll = zero
47*59599516SKenneth E. Jansen
48*59599516SKenneth E. Jansen
49*59599516SKenneth E. Jansen      if (code .ne. 'in ' .and. code .ne. 'out')
50*59599516SKenneth E. Jansen     &  call error ('commu   ','code    ',0)
51*59599516SKenneth E. Jansen
52*59599516SKenneth E. Jansen
53*59599516SKenneth E. Jansen      if     (n .eq. 1)      then        ! like a scalar
54*59599516SKenneth E. Jansen        kdof = 1
55*59599516SKenneth E. Jansen      elseif (n .eq. nsd)    then        ! like the normal vectors
56*59599516SKenneth E. Jansen        kdof = 2
57*59599516SKenneth E. Jansen      elseif (n .eq. ndof)   then        ! res, y, ac, krylov vectors....
58*59599516SKenneth E. Jansen        kdof = 3
59*59599516SKenneth E. Jansen      elseif (n .eq. nflow*nflow) then     ! bdiag
60*59599516SKenneth E. Jansen        kdof = 4
61*59599516SKenneth E. Jansen      elseif (n .eq. (nflow-1)*nsd) then  ! qres
62*59599516SKenneth E. Jansen        kdof = 5
63*59599516SKenneth E. Jansen      elseif (n .eq. nflow) then
64*59599516SKenneth E. Jansen        kdof = 6
65*59599516SKenneth E. Jansen      elseif (n .eq. 24 ) then
66*59599516SKenneth E. Jansen        kdof = 7
67*59599516SKenneth E. Jansen      elseif (n .eq. 9) then
68*59599516SKenneth E. Jansen        kdof = 8
69*59599516SKenneth E. Jansen      elseif (n .eq. 11 ) then
70*59599516SKenneth E. Jansen        kdof = 9
71*59599516SKenneth E. Jansen      elseif (n .eq. 7 ) then
72*59599516SKenneth E. Jansen        kdof = 10
73*59599516SKenneth E. Jansen!      elseif (n .eq. 33 ) then ! hack
74*59599516SKenneth E. Jansen      elseif (n .eq. 13 ) then ! for error
75*59599516SKenneth E. Jansen         kdof = 11
76*59599516SKenneth E. Jansen!      elseif (n .eq. 22 ) then
77*59599516SKenneth E. Jansen      elseif (n .eq. 17 ) then
78*59599516SKenneth E. Jansen         kdof = 12
79*59599516SKenneth E. Jansen      elseif (n .eq. 16 ) then
80*59599516SKenneth E. Jansen         kdof = 13
81*59599516SKenneth E. Jansen      elseif (n .eq. 10 ) then
82*59599516SKenneth E. Jansen         kdof = 14
83*59599516SKenneth E. Jansen      elseif (n .eq. nflow*nsd ) then   !surface tension + qres
84*59599516SKenneth E. Jansen         kdof = 15
85*59599516SKenneth E. Jansen      else
86*59599516SKenneth E. Jansen        call error ('commuMax','n       ',n)
87*59599516SKenneth E. Jansen      endif
88*59599516SKenneth E. Jansen
89*59599516SKenneth E. Jansenc... Note that when adding another kdof to the above set, we must
90*59599516SKenneth E. Jansenc... also make changes in ctypes.f and auxmpi.h
91*59599516SKenneth E. Jansen
92*59599516SKenneth E. Jansenc---------------------------------------------------------------------
93*59599516SKenneth E. Jansenc  ilwork(1): number of tasks
94*59599516SKenneth E. Jansenc
95*59599516SKenneth E. Jansenc  The following information is contained in ilwork for each task:
96*59599516SKenneth E. Jansenc     itag: tag of the communication
97*59599516SKenneth E. Jansenc     iacc: == 0 if task is a send
98*59599516SKenneth E. Jansenc           == 1 if task is a recieve
99*59599516SKenneth E. Jansenc     iother: rank of processor with which this communication occurs
100*59599516SKenneth E. Jansenc     numseg: number of data "segments" to be sent or recieved. A
101*59599516SKenneth E. Jansenc             segment is defined as a continuous section of the global
102*59599516SKenneth E. Jansenc             vector to be communicated, (i.e. a group of nodes (or,
103*59599516SKenneth E. Jansenc             rather, "shape function coefficients") which occur
104*59599516SKenneth E. Jansenc             sequentially in the array global(nshg,n)).
105*59599516SKenneth E. Jansenc     isbeg:  location of the first segment in the array owned by the
106*59599516SKenneth E. Jansenc             current processor.
107*59599516SKenneth E. Jansenc
108*59599516SKenneth E. Jansenc The two types of communication are 'in', where the residual is being
109*59599516SKenneth E. Jansenc communicated, and 'out', where the solution is being communicated.
110*59599516SKenneth E. Jansenc Note that when the type is 'out', senders recieve and recievers send.
111*59599516SKenneth E. Jansenc
112*59599516SKenneth E. Jansenc The following comment pertains to a communication of type 'in':
113*59599516SKenneth E. Jansenc
114*59599516SKenneth E. Jansenc     If the task is a send, then all of the numseg segments are
115*59599516SKenneth E. Jansenc     sent with a single call to MPI_SEND. Where these segments live in
116*59599516SKenneth E. Jansenc     the array is built into the array sevsegtype, which is a common
117*59599516SKenneth E. Jansenc     array constructed in the subroutine "ctypes.f". In other words,
118*59599516SKenneth E. Jansenc     sevsegtype is a data type that describes the indices of the blocks
119*59599516SKenneth E. Jansenc     to be sent, in terms of there beginning index, and the length of
120*59599516SKenneth E. Jansenc     each segment. Using this, we can make a single send to take care of
121*59599516SKenneth E. Jansenc     all the segments for this task.
122*59599516SKenneth E. Jansenc
123*59599516SKenneth E. Jansenc     If the task is a recieve, then once the vector is recieved, the
124*59599516SKenneth E. Jansenc     recieved segments must be added to the correct locations in the
125*59599516SKenneth E. Jansenc     current array. These locations are described in ilwork as the
126*59599516SKenneth E. Jansenc     beginning position, then the length of the segment.
127*59599516SKenneth E. Jansenc
128*59599516SKenneth E. Jansenc---------------------------------------------------------------------
129*59599516SKenneth E. Jansen      numtask = ilwork(1)
130*59599516SKenneth E. Jansen
131*59599516SKenneth E. Jansen      itkbeg = 1
132*59599516SKenneth E. Jansen      m = 0
133*59599516SKenneth E. Jansen      idl=0
134*59599516SKenneth E. Jansen
135*59599516SKenneth E. Jansen      DO itask = 1, numtask
136*59599516SKenneth E. Jansen        m      = m + 1
137*59599516SKenneth E. Jansen        itag   = ilwork (itkbeg + 1)
138*59599516SKenneth E. Jansen        iacc   = ilwork (itkbeg + 2)
139*59599516SKenneth E. Jansen        iother = ilwork (itkbeg + 3)
140*59599516SKenneth E. Jansen        numseg = ilwork (itkbeg + 4)
141*59599516SKenneth E. Jansen        isgbeg = ilwork (itkbeg + 5)
142*59599516SKenneth E. Jansenc
143*59599516SKenneth E. Jansenc.... if iacc == 0, then this task is a send.
144*59599516SKenneth E. Jansenc     slave
145*59599516SKenneth E. Jansenc
146*59599516SKenneth E. Jansen        if (iacc .EQ. 0) then
147*59599516SKenneth E. Jansenc
148*59599516SKenneth E. Jansenc.... residual communication
149*59599516SKenneth E. Jansenc
150*59599516SKenneth E. Jansen          if (code .eq. 'in ') then
151*59599516SKenneth E. Jansen            if(impistat.eq.1) iISend = iISend+1
152*59599516SKenneth E. Jansen            if(impistat.eq.1) rmpitmr = TMRC()
153*59599516SKenneth E. Jansen            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
154*59599516SKenneth E. Jansen     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
155*59599516SKenneth E. Jansen            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
156*59599516SKenneth E. Jansen            if(impistat.eq.1) rISend = rISend+rDelISend
157*59599516SKenneth E. Jansen          endif
158*59599516SKenneth E. Jansenc
159*59599516SKenneth E. Jansenc.... solution communication
160*59599516SKenneth E. Jansenc
161*59599516SKenneth E. Jansen          if (code .eq. 'out') then
162*59599516SKenneth E. Jansen            if(impistat.eq.1) iIRecv = iIRecv+1
163*59599516SKenneth E. Jansen            if(impistat.eq.1) rmpitmr = TMRC()
164*59599516SKenneth E. Jansen            call MPI_IRECV(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
165*59599516SKenneth E. Jansen     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
166*59599516SKenneth E. Jansen            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
167*59599516SKenneth E. Jansen            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
168*59599516SKenneth E. Jansen          endif
169*59599516SKenneth E. Jansenc
170*59599516SKenneth E. Jansenc.... if iacc == 1, then this task is a recieve.
171*59599516SKenneth E. Jansenc     master
172*59599516SKenneth E. Jansenc
173*59599516SKenneth E. Jansen        else
174*59599516SKenneth E. Jansen          if (code .eq. 'in ') then
175*59599516SKenneth E. Jansenc
176*59599516SKenneth E. Jansenc.... determine the number of total number of nodes involved in this
177*59599516SKenneth E. Jansenc     communication (lfront), including all segments
178*59599516SKenneth E. Jansenc
179*59599516SKenneth E. Jansen            lfront = 0
180*59599516SKenneth E. Jansen            do is = 1,numseg
181*59599516SKenneth E. Jansen              lenseg = ilwork (itkbeg + 4 + 2*is)
182*59599516SKenneth E. Jansen              lfront = lfront + lenseg
183*59599516SKenneth E. Jansen            enddo
184*59599516SKenneth E. Jansenc
185*59599516SKenneth E. Jansenc.... recieve all segments for this task in a single step
186*59599516SKenneth E. Jansenc
187*59599516SKenneth E. Jansen            idl=idl+1 ! stands for i Do Later, the number to fix later
188*59599516SKenneth E. Jansen            if(impistat.eq.1) iIRecv = iIRecv+1
189*59599516SKenneth E. Jansen            if(impistat.eq.1) rmpitmr = TMRC()
190*59599516SKenneth E. Jansen            call MPI_IRECV(rtemp(1,idl), lfront*n, MPI_DOUBLE_PRECISION,
191*59599516SKenneth E. Jansen     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
192*59599516SKenneth E. Jansen            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
193*59599516SKenneth E. Jansen            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
194*59599516SKenneth E. Jansen          endif
195*59599516SKenneth E. Jansen          if (code .eq. 'out') then
196*59599516SKenneth E. Jansen            if(impistat.eq.1) iISend = iISend+1
197*59599516SKenneth E. Jansen            if(impistat.eq.1) rmpitmr = TMRC()
198*59599516SKenneth E. Jansen            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
199*59599516SKenneth E. Jansen     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
200*59599516SKenneth E. Jansen            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
201*59599516SKenneth E. Jansen            if(impistat.eq.1) rISend = rISend+rDelISend
202*59599516SKenneth E. Jansen          endif
203*59599516SKenneth E. Jansen        endif
204*59599516SKenneth E. Jansen
205*59599516SKenneth E. Jansen        itkbeg = itkbeg + 4 + 2*numseg
206*59599516SKenneth E. Jansen
207*59599516SKenneth E. Jansen      enddo   !! end tasks loop
208*59599516SKenneth E. Jansen
209*59599516SKenneth E. Jansen      if(impistat.eq.1) iWaitAll = iWaitAll+1
210*59599516SKenneth E. Jansen      if(impistat.eq.1) rmpitmr = TMRC()
211*59599516SKenneth E. Jansen      call MPI_WAITALL(m, req, stat, ierr)
212*59599516SKenneth E. Jansen      if(impistat.eq.1) rDelWaitAll = TMRC()-rmpitmr
213*59599516SKenneth E. Jansen      if(impistat.eq.1) rWaitAll = rWaitAll+rDelWaitAll
214*59599516SKenneth E. Jansen      if(impistat.eq.1) rCommu = rCommu+rDelIRecv+rDelISend+rDelWaitAll
215*59599516SKenneth E. Jansen
216*59599516SKenneth E. Jansenc
217*59599516SKenneth E. Jansenc     Stuff added below is a delayed assembly of that which was communicated
218*59599516SKenneth E. Jansenc     above but due to the switch to non-blocking receivves could not be
219*59599516SKenneth E. Jansenc     assembled until after the waitall.  Only necessary for commu "in"
220*59599516SKenneth E. Jansenc
221*59599516SKenneth E. Jansen
222*59599516SKenneth E. Jansen      if(code .eq. 'in ') then
223*59599516SKenneth E. Jansen         itkbeg=1
224*59599516SKenneth E. Jansen         jdl=0
225*59599516SKenneth E. Jansen         do j=1,numtask         ! time to do all the segments that needed to be
226*59599516SKenneth E. Jansen                                ! assembled into the global vector
227*59599516SKenneth E. Jansen
228*59599516SKenneth E. Jansen            iacc   = ilwork (itkbeg + 2)
229*59599516SKenneth E. Jansen            numseg = ilwork (itkbeg + 4)
230*59599516SKenneth E. Jansen            isgbeg = ilwork (itkbeg + 5)
231*59599516SKenneth E. Jansen            if(iacc.eq.1) then
232*59599516SKenneth E. Jansen               jdl=jdl+1  ! keep track of order of rtemp's
233*59599516SKenneth E. Jansenc
234*59599516SKenneth E. Jansenc... add the recieved data to the global array on the current processor.
235*59599516SKenneth E. Jansenc    Note that this involves splitting up the chunk of recieved data
236*59599516SKenneth E. Jansenc    into its correct segment locations for the current processor.
237*59599516SKenneth E. Jansenc
238*59599516SKenneth E. Jansen               itemp = 1
239*59599516SKenneth E. Jansen               do idof = 1,n
240*59599516SKenneth E. Jansen                  do is = 1,numseg
241*59599516SKenneth E. Jansen                 isgbeg = ilwork (itkbeg + 3 + 2*is)
242*59599516SKenneth E. Jansen                 lenseg = ilwork (itkbeg + 4 + 2*is)
243*59599516SKenneth E. Jansen                 isgend = isgbeg + lenseg - 1
244*59599516SKenneth E. Jansenc                 global(isgbeg:isgend,idof) = global(isgbeg:isgend,idof)
245*59599516SKenneth E. Jansenc     &                                + rtemp (itemp:itemp+lenseg-1,jdl)
246*59599516SKenneth E. Jansen                 do k=isgbeg,isgend  ! break this into an explicit loop an max instead of accumulate
247*59599516SKenneth E. Jansen                 global(k,idof) = max(global(k,idof),rtemp (itemp,jdl))
248*59599516SKenneth E. Jansen                 itemp=itemp+1   ! advance this index one at a time instead of in lenseg jumps
249*59599516SKenneth E. Jansen                 enddo
250*59599516SKenneth E. Jansenc                 itemp = itemp + lenseg
251*59599516SKenneth E. Jansen                  enddo
252*59599516SKenneth E. Jansen               enddo
253*59599516SKenneth E. Jansen            endif ! end of receive (iacc=1)
254*59599516SKenneth E. Jansen            itkbeg = itkbeg + 4 + 2*numseg
255*59599516SKenneth E. Jansen         enddo
256*59599516SKenneth E. Jansen      endif  ! commu "in"
257*59599516SKenneth E. Jansen      return
258*59599516SKenneth E. Jansen      end
259*59599516SKenneth E. Jansen
260*59599516SKenneth E. Jansen
261*59599516SKenneth E. Jansen
262