xref: /phasta/phSolver/AMG/ramg_paratools.f (revision 595995161822a203c8467e0e4a253d7bd7d6df32) !
1
2!**************************************************************
3!      ramg_global_PPE
4!       Make lhsP global complete
5!**************************************************************
6
7      subroutine ramg_global_lhs(
8     &               acolm,arowp,alhsP,annz_tot,
9     &               lhsGPcolm,lhsGProwp,lhsGP,
10     &               redun,
11     &               ilwork,BC,iBC,iper)
12      use ramg_data
13      include "common.h"
14      include "mpif.h"
15      include "auxmpi.h"
16
17      integer,intent(in)            :: annz_tot
18      integer,intent(in),dimension(nshg+1)            :: acolm
19      integer,intent(in),dimension(annz_tot)           :: arowp
20      integer,intent(in)                              :: redun
21      real(kind=8),intent(in),dimension(redun,annz_tot)    :: alhsP
22!      real(kind=8),dimension(:,:),allocatable          :: lhsGP
23!      integer,dimension(:),allocatable                 :: lhsGProwp
24!      integer,dimension(:),allocatable                 :: lhsGPcolm
25      type(r2d),intent(inout) :: lhsGP
26      type(i1d),intent(inout) :: lhsGProwp,lhsGPcolm
27      integer,intent(in),dimension(nlwork)            :: ilwork
28      integer,intent(in),dimension(nshg)              :: iBC,iper
29      real(kind=8),intent(in),dimension(nshg,ndofBC)  :: BC
30
31      integer           :: numtask,i,j,k,m,p,ki,kj,krowp,ierr
32      integer           :: gi,gj,gk
33      integer           :: itag,iacc,iother,numseg,isgbeg,itkbeg
34      integer    :: stat(MPI_STATUS_SIZE,redun*numpe),req(redun*numpe)
35      integer           :: mcheck
36
37      real(kind=8) :: swaptemp(redun)
38
39      character fname*10
40
41      ! temp variables, used for communication
42
43      ! each task has a set of storage rooms for the submatrix
44      ! to communicate
45
46      integer,dimension(nshg) :: tmp_rowmap,tmp_revrmap
47      real(kind=8),dimension(redun,nshg) :: tmp_rmtx
48
49      integer,dimension(nshg) :: Pflag
50      integer,dimension(nshg+1) :: Pcolm
51      type(i2dd) :: Prowp
52      type(r12dd) :: Pmtx
53      integer :: rownnz
54
55
56      if (numpe .le. 1) then
57          allocate(lhsGP%p(redun,annz_tot),stat=ierr)
58          allocate(lhsGProwp%p(annz_tot))
59          allocate(lhsGPcolm%p(nshg+1))
60          lhsGP%p(:,:) = alhsP(:,:)
61          lhsGProwp%p(:) = arowp(:)
62          lhsGPcolm%p(:) = acolm(:)
63          return
64      end if
65
66      numtask = ilwork(1)
67      m = 0
68      itkbeg = 1
69
70      revmap = 0
71
72      allocate(sub_map%pp(numtask))
73      allocate(sub_revmap%pp(numtask))
74      allocate(sub_colm%pp(numtask))
75      allocate(sub_colm2%pp(numtask))
76      allocate(sub_rowp%pp(numtask))
77      allocate(sub_rowp2%pp(numtask))
78      allocate(sub_rowpmap%pp(numtask))
79      allocate(sub_mtx%pp(numtask))
80      allocate(sub_mtx2%pp(numtask))
81      allocate(sub_nnz%p(numtask))
82      allocate(sub_nnz2%p(numtask))
83      allocate(sub_nshg%p(numtask))
84
85      ! submatrix : colm
86      do i=1,numtask
87
88         ! Beginning of each task
89         m = m+1
90         ! Task head information, ref commu.f
91         itag = ilwork(itkbeg+1)
92         iacc = ilwork(itkbeg+2)
93         iother = ilwork(itkbeg+3)
94         numseg = ilwork(itkbeg+4)
95         isgbeg = ilwork(itkbeg+5)
96
97         sub_nshg%p(i) = numseg
98
99         allocate(sub_map%pp(i)%p(numseg))
100         allocate(sub_revmap%pp(i)%p(nshg))
101
102         sub_map%pp(i)%p = 0
103         sub_revmap%pp(i)%p = 0
104
105         ! prepare vector mapping
106         do j=1,numseg
107            k = ilwork(itkbeg+3+2*j) ! row idx
108            sub_map%pp(i)%p(j) = k
109            sub_revmap%pp(i)%p(k) = j
110!            if ((myrank.eq.1).and.(iother.eq.3)) then
111!            write(*,*)'mcheck:',myrank,iother,j,k,ncorp_map(k)
112!            endif
113         enddo
114
115         allocate(sub_colm%pp(i)%p(numseg+1))
116         allocate(sub_colm2%pp(i)%p(numseg+1))
117
118         ! prepare matrix mapping, sub matrix colm
119         sub_nnz%p(i) = 0
120         do j=1,numseg
121            sub_colm%pp(i)%p(j) = sub_nnz%p(i) + 1
122            ki = sub_map%pp(i)%p(j)
123            do kj = acolm(ki),acolm(ki+1)-1
124               krowp = arowp(kj)
125               if (sub_revmap%pp(i)%p(krowp).ne.0) then
126!      if ((ncorp_map(ki).eq.964)) then
127!      if ((ncorp_map(krowp).eq.884)) then
128!          write(*,*)'964/884 prepared in',myrank,i,iother
129!      else
130!          write(*,*)'line 964:',myrank,i,iother,ncorp_map(krowp)
131!      endif
132!      endif
133                   sub_nnz%p(i) = sub_nnz%p(i) + 1
134               end if
135            enddo
136            !write(*,*)'mcheck: ',myrank,j,sub_nnz
137         enddo
138         sub_colm%pp(i)%p(numseg+1) = sub_nnz%p(i) + 1
139
140         if (iacc.eq.0) then ! this task is a send, master
141            call MPI_ISEND(sub_colm%pp(i)%p(1),numseg+1,MPI_INTEGER,
142     &            iother,itag,MPI_COMM_WORLD,req(m),ierr)
143            call MPI_IRECV(sub_colm2%pp(i)%p(1),numseg+1,MPI_INTEGER,
144     &            iother,itag,MPI_COMM_WORLD,req(m+1),ierr)
145
146         else if (iacc.eq.1) then ! this task is a receive, slave
147            call MPI_IRECV(sub_colm2%pp(i)%p(1),numseg+1,MPI_INTEGER,
148     &            iother,itag,MPI_COMM_WORLD,req(m),ierr)
149            call MPI_ISEND(sub_colm%pp(i)%p(1),numseg+1,MPI_INTEGER,
150     &            iother,itag,MPI_COMM_WORLD,req(m+1),ierr)
151
152         end if
153
154         m = m + 1
155
156         ! Task end, next task
157         itkbeg = itkbeg + 4 + 2*numseg
158
159      enddo
160
161      call MPI_WAITALL(m,req,stat,ierr)
162
163      do i=1,numtask
164         sub_nnz2%p(i) = sub_colm2%pp(i)%p(sub_nshg%p(i)+1)-1
165      enddo
166
167      ! sub matrix : rowp,mtx
168      m = 0
169      itkbeg = 1
170      do i=1,numtask
171
172         ! Beginning of each task
173         m = m+1
174         ! Task head information, ref commu.f
175         itag = ilwork(itkbeg+1)
176         iacc = ilwork(itkbeg+2)
177         iother = ilwork(itkbeg+3)
178         numseg = ilwork(itkbeg+4)
179         isgbeg = ilwork(itkbeg+5)
180
181         allocate(sub_rowp%pp(i)%p(sub_nnz%p(i)))
182         allocate(sub_rowp2%pp(i)%p(sub_nnz2%p(i)))
183         allocate(sub_rowpmap%pp(i)%p(sub_nnz%p(i)))
184         allocate(sub_mtx%pp(i)%p(redun,sub_nnz%p(i)))
185         allocate(sub_mtx2%pp(i)%p(redun,sub_nnz2%p(i)))
186
187         ! prepare matrix mapping, sub matrix rowp
188         k = 0
189         do j=1,numseg
190            ki = sub_map%pp(i)%p(j)
191            do kj = acolm(ki),acolm(ki+1)-1
192               krowp = arowp(kj)
193               if (sub_revmap%pp(i)%p(krowp).ne.0) then
194                   k = k + 1
195                   sub_rowp%pp(i)%p(k) = sub_revmap%pp(i)%p(krowp)
196                   sub_rowpmap%pp(i)%p(k) = kj
197                   do p=1,redun
198                      sub_mtx%pp(i)%p(p,k) = alhsP(p,kj)
199                   enddo
200               end if
201            enddo
202         enddo
203
204         if (iacc.eq.0) then ! this task is a send, master
205        call MPI_ISEND(sub_rowp%pp(i)%p(1),sub_nnz%p(i),MPI_INTEGER,
206     &            iother,itag,MPI_COMM_WORLD,req(m),ierr)
207       call MPI_IRECV(sub_rowp2%pp(i)%p(1),sub_nnz2%p(i),MPI_INTEGER,
208     &            iother,itag,MPI_COMM_WORLD,req(m+1),ierr)
209
210         else if (iacc.eq.1) then ! this task is a receive, slave
211       call MPI_IRECV(sub_rowp2%pp(i)%p(1),sub_nnz2%p(i),MPI_INTEGER,
212     &            iother,itag,MPI_COMM_WORLD,req(m),ierr)
213        call MPI_ISEND(sub_rowp%pp(i)%p(1),sub_nnz%p(i),MPI_INTEGER,
214     &            iother,itag,MPI_COMM_WORLD,req(m+1),ierr)
215         end if
216         m = m + 2
217         if (iacc.eq.0) then ! this task is a send, master
218        call MPI_ISEND(sub_mtx%pp(i)%p(1,1),redun*sub_nnz%p(i),
219     &            MPI_DOUBLE_PRECISION,
220     &            iother,itag,MPI_COMM_WORLD,req(m),ierr)
221       call MPI_IRECV(sub_mtx2%pp(i)%p(1,1),redun*sub_nnz2%p(i),
222     &            MPI_DOUBLE_PRECISION,
223     &            iother,itag,MPI_COMM_WORLD,req(m+1),ierr)
224
225         else if (iacc.eq.1) then ! this task is a receive, slave
226        call MPI_IRECV(sub_mtx2%pp(i)%p(1,1),redun*sub_nnz2%p(i),
227     &            MPI_DOUBLE_PRECISION,
228     &            iother,itag,MPI_COMM_WORLD,req(m),ierr)
229        call MPI_ISEND(sub_mtx%pp(i)%p(1,1),redun*sub_nnz%p(i),
230     &            MPI_DOUBLE_PRECISION,
231     &            iother,itag,MPI_COMM_WORLD,req(m+1),ierr)
232         end if
233
234         m = m + 1
235
236         ! Task end, next task
237         itkbeg = itkbeg + 4 + 2*numseg
238
239      enddo
240
241      call MPI_WAITALL(m,req,stat,ierr)
242
243      if (.false.) then
244      ! dump some matrices for matlab
245      do i=1,numtask
246       write(fname,'((A4)(I1)(A5))')'subP',i,'     '
247       !call ramg_dump_i(ncorp_map,nshg,1,'pam_corpmd')
248!       call ramg_dump_matlab_map(sub_colm%pp(i)%p,sub_rowp%pp(i)%p,
249!     &      sub_mtx%pp(i)%p,sub_nshg%p(i),sub_nnz%p(i),4,
250!     &      sub_map%pp(i)%p,fname)
251!      enddo
252!      do i=1,numtask
253!       write(fname,'((A5)(I1)(A4))')'subPs',i,'    '
254!       !call ramg_dump_i(ncorp_map,nshg,1,'pam_corpmd')
255!       call ramg_dump_matlab_map(sub_colm2%pp(i)%p,sub_rowp2%pp(i)%p,
256!     &      sub_mtx2%pp(i)%p,sub_nshg%p(i),sub_nnz2%p(i),4,
257!     &      sub_map%pp(i)%p,fname)
258      enddo
259      endif ! dump? no dump?
260
261!      call ramg_dump_matlab_map(acolm,arowp,alhsP,nshg,annz_tot,redun,
262!     &     'locallhsPb')
263
264      ! merge with extra entries
265
266      Pflag = 0
267      allocate(Prowp%pp(nshg))
268      allocate(Pmtx%pp(nshg))
269      Pcolm = 0
270
271      do i=1,numtask ! each task
272         do j=1,sub_nshg%p(i) ! each line
273
274            tmp_rowmap = 0
275            tmp_revrmap = 0
276            tmp_rmtx = 0
277            rownnz = 0
278
279            ! Prepare
280            ki = sub_map%pp(i)%p(j)
281            if (Pflag(ki).eq.0) then ! a new line
282                Pflag(ki) = 1
283                do k=acolm(ki),acolm(ki+1)-1
284                   kj = arowp(k)
285                   tmp_rowmap(kj) = k-acolm(ki)+1
286                   tmp_revrmap(k-acolm(ki)+1)=kj
287                   tmp_rmtx(:,kj) = alhsP(:,k)
288                enddo
289                rownnz = rownnz+acolm(ki+1)-acolm(ki)
290            else ! existing line
291                ! expand sparse line to full line
292                do k=1,Pcolm(ki)
293                   kj = Prowp%pp(ki)%p(k)
294                   tmp_rowmap(kj) = k
295                   tmp_revrmap(k) = kj
296                   tmp_rmtx(:,kj) = Pmtx%pp(ki)%p(:,k)
297                enddo
298                rownnz = rownnz+Pcolm(ki)
299                deallocate(Prowp%pp(ki)%p)
300                deallocate(Pmtx%pp(ki)%p)
301            endif
302
303            ! Merge
304            do ki = sub_colm2%pp(i)%p(j),sub_colm2%pp(i)%p(j+1)-1
305            ! each entry in second mtx /slave
306               krowp = sub_rowp2%pp(i)%p(ki)
307               kj = sub_map%pp(i)%p(krowp)
308               if (tmp_rowmap(kj).eq.0) then ! not yet occupied
309               rownnz = rownnz+1
310               tmp_rowmap(kj) = rownnz
311               tmp_revrmap(rownnz) = kj
312               endif
313!         if ((ncorp_map(sub_map%pp(i)%p(j)).eq.964)) then
314!         if ((ncorp_map(kj).eq.883)) then
315!             write(*,*)'paramcheck',myrank,i,sub_mtx2%pp(i)%p(1,ki)
316!         else
317!             write(*,*)'paramcheck2',myrank,ncorp_map(kj)
318!         endif
319!         endif
320              do p=1,redun
321               tmp_rmtx(p,kj) = tmp_rmtx(p,kj)+sub_mtx2%pp(i)%p(p,ki)
322               enddo
323            enddo
324
325            ! Store
326            ki = sub_map%pp(i)%p(j)
327            Pcolm(ki) = rownnz
328            allocate(Prowp%pp(ki)%p(rownnz))
329            allocate(Pmtx%pp(ki)%p(redun,rownnz))
330            do k=1,rownnz
331               kj = tmp_revrmap(k)
332               Prowp%pp(ki)%p(k) = kj
333               Pmtx%pp(ki)%p(:,k) = tmp_rmtx(:,kj)
334!               if ((ncorp_map(ki).eq.964)) then
335!                   write(*,*)'paramcheck',myrank,i,k,kj,ncorp_map(kj)
336!               endif
337!         if ((ncorp_map(ki).eq.964).and.(ncorp_map(kj).eq.883)) then
338!             write(*,*)'paramcheck',myrank,i,tmp_rmtx(1,kj)
339!         endif
340            enddo
341
342            !sort
343            do gi=1,rownnz
344            do gj=gi+1,rownnz
345               if (Prowp%pp(ki)%p(gi).gt.Prowp%pp(ki)%p(gj)) then
346                   gk = Prowp%pp(ki)%p(gj)
347                   Prowp%pp(ki)%p(gj) = Prowp%pp(ki)%p(gi)
348                   Prowp%pp(ki)%p(gi) = gk
349                   swaptemp(:) = Pmtx%pp(ki)%p(:,gj)
350                   Pmtx%pp(ki)%p(:,gj) = Pmtx%pp(ki)%p(:,gi)
351                   Pmtx%pp(ki)%p(:,gi) = swaptemp(:)
352               endif
353            enddo
354            enddo
355
356         enddo
357      enddo
358
359      allocate(lhsGPcolm%p(nshg+1))
360
361      rownnz = 0
362      k = 0
363      lhsGPcolm%p(1) = 1
364      do i=1,nshg
365      ! colm
366         if (Pflag(i).eq.0) then ! original lhsP
367             rownnz = rownnz+acolm(i+1)-acolm(i)
368         else ! new lhsP
369             rownnz = rownnz+Pcolm(i)
370             k = k+1
371         endif
372         lhsGPcolm%p(i+1)=rownnz+1
373      enddo
374
375!      call ramg_dump_i(lhsGPcolm%p,nshg,1,'lhsGPcolm ')
376
377      allocate(lhsGProwp%p(rownnz))
378      allocate(lhsGP%p(redun,rownnz))
379
380      rownnz = 1
381      do i=1,nshg
382      ! rowp & mtx.
383         if (Pflag(i).eq.0) then ! original lhsP
384             do j=acolm(i),acolm(i+1)-1
385                lhsGProwp%p(rownnz) = arowp(j)
386                lhsGP%p(:,rownnz) = alhsP(:,j)
387                rownnz = rownnz + 1
388             enddo
389         else ! new lhsP
390             do j=1,Pcolm(i)
391                lhsGProwp%p(rownnz) = Prowp%pp(i)%p(j)
392                lhsGP%p(:,rownnz) = Pmtx%pp(i)%p(:,j)
393                rownnz = rownnz + 1
394             enddo
395         endif
396      enddo
397
398      ! First entry be diagonal for PPE
399      if (redun.eq.1) then
400      loop_i: do i=1,nshg
401         gi = lhsGPcolm%p(i)
402         if (lhsGProwp%p(gi).ne.i) then
403         do j=gi+1,lhsGPcolm%p(i+1)-1
404            k = lhsGProwp%p(j)
405            if (k.eq.i) then !swap first and k(diag)
406!                 gj = lhsGProwp%p(gi)
407                lhsGProwp%p(j) = lhsGProwp%p(gi)
408                lhsGProwp%p(gi) = i
409
410                swaptemp(:) = lhsGP%p(:,gi)
411                lhsGP%p(:,gi) = lhsGP%p(:,j)
412                lhsGP%p(:,j) = swaptemp(:)
413                cycle loop_i
414            endif
415         enddo
416         endif
417      enddo loop_i
418      endif
419
420      if (redun.eq.1) then ! check PPE
421      do i=1,nshg
422         do j=lhsGPcolm%p(i),lhsGPcolm%p(i+1)-1
423            k = lhsGProwp%p(j)
424            gi = amg_paramap(1)%p(i)
425            gk = amg_paramap(1)%p(k)
426            if ((gi.eq.gk).and.(gi.lt.0) ) then
427            !lhsGP%p(:,j) = 0
428            endif
429         enddo
430      enddo
431      endif
432
433!      call ramg_dump_matlab_map(lhsGPcolm%p,lhsGProwp%p,lhsGP%p,
434!     &      nshg,rownnz-1,redun,'locallhsP ')
435
436!      write(*,*)'mcheck',myrank,nnz_tot,rownnz,k
437
438!      write(*,*)'mcheck,',myrank,'okay here'
439
440      ! Deallocate temporary arrays
441      do i=1,nshg
442         if (Pflag(i) .eq. 1) then
443             deallocate(Prowp%pp(i)%p)
444             deallocate(Pmtx%pp(i)%p)
445         endif
446      enddo
447!          write(*,*)'mcheck deallocate,',myrank
448
449      deallocate(Prowp%pp)
450      deallocate(Pmtx%pp)
451
452        do i=1,numtask
453
454           deallocate(sub_map%pp(i)%p)
455           deallocate(sub_revmap%pp(i)%p)
456           deallocate(sub_colm%pp(i)%p)
457           deallocate(sub_colm2%pp(i)%p)
458           deallocate(sub_rowp%pp(i)%p)
459           deallocate(sub_rowp2%pp(i)%p)
460           deallocate(sub_rowpmap%pp(i)%p)
461           deallocate(sub_mtx%pp(i)%p)
462           deallocate(sub_mtx2%pp(i)%p)
463
464         enddo
465
466        deallocate(sub_map%pp)
467        deallocate(sub_revmap%pp)
468        deallocate(sub_rowpmap%pp)
469        deallocate(sub_nnz%p)
470        deallocate(sub_nnz2%p)
471        deallocate(sub_nshg%p)
472        deallocate(sub_colm%pp)
473        deallocate(sub_colm2%pp)
474        deallocate(sub_rowp%pp)
475        deallocate(sub_rowp2%pp)
476        deallocate(sub_mtx%pp)
477        deallocate(sub_mtx2%pp)
478
479      end subroutine ! ramg_global_lhs
480
481!*******************************************************************
482!      ramg_PPEAp
483!      produce parallel A-p product for PPE correctly
484!      q = PPE * p without scaling
485!*******************************************************************
486      subroutine ramg_PPEAp(q,p,
487     &                      colm,rowp,lhsK,lhsP,
488     &                      ilwork,BC,iBC,iper)
489      use ramg_data
490      include "common.h"
491
492
493      real(kind=8),intent(in),dimension(nshg)         :: p
494      real(kind=8),intent(inout),dimension(nshg)      :: q
495      integer,intent(in),dimension(nshg+1)            :: colm
496      integer,intent(in),dimension(nnz_tot)           :: rowp
497      real(kind=8),intent(in),dimension(9,nnz_tot)    :: lhsK
498      real(kind=8),intent(in),dimension(4,nnz_tot)    :: lhsP
499      integer,intent(in),dimension(nlwork)            :: ilwork
500      integer,intent(in),dimension(nshg)              :: iBC,iper
501      real(kind=8),intent(in),dimension(nshg,ndofBC)  :: BC
502
503      real(kind=8),dimension(nshg,1) :: t1,t1a
504      real(kind=8),dimension(nshg,3) :: t2,t2a
505      real(kind=8),dimension(nshg,4) :: t2b
506      real(kind=8),dimension(nshg,4) :: diag
507
508      diag = ramg_flowDiag%p
509
510      ! scaling
511      call fMtxVdimVecMult(p,amg_ppeDiag(1)%p,t1a,1,1,1,1,nshg)
512      call fMtxVdimVecMult(t1a,diag(:,4),t1,1,1,1,1,nshg)
513      !t1(:,1) = p
514      ! G
515      call commOut(t1,ilwork,1,iper,iBC,BC)
516      call fLesSparseApG(colm,rowp,lhsP,t1,t2,nshg,nnz_tot)
517      call commIn(t2,ilwork,3,iper,iBC,BC)
518      ! K^{-1}
519      call fMtxVdimVecMult(t2,diag,t2a,3,4,3,3,nshg)
520      call fMtxVdimVecMult(t2a,diag,t2,3,4,3,3,nshg)
521         ! note: different from lestools.c (3,4,4,3)
522         ! t1 should be good to use by now
523      ! G^T ... + C
524      t2b(:,1:3) = -t2(:,1:3)
525      t2b(:,4) = t1(:,1)
526      call commOut(t2b,ilwork,4,iper,iBC,BC)
527      call fLesSparseApNGtC(colm,rowp,lhsP,t2b,t1,nshg,nnz_tot)
528      call commIn(t1,ilwork,1,iper,iBC,BC)
529      !q = t1(:,1)
530      ! scaling again
531      call fMtxVdimVecMult(t1,diag(:,4),t1a,1,1,1,1,nshg)
532      call fMtxVdimVecMult(t1a,amg_ppeDiag(1)%p,q,1,1,1,1,nshg)
533
534      end subroutine
535
536
537!*******************************************************************
538!      ramg_PPEAps
539!      produce parallel A-p product for PPE correctly
540!      q = PPE * p, WITH SCALING!
541!*******************************************************************
542      subroutine ramg_PPEAps(q,p,diag,
543     &                      colm,rowp,lhsK,lhsP,
544     &                      ilwork,BC,iBC,iper)
545      use ramg_data
546      include "common.h"
547
548
549      real(kind=8),intent(in),dimension(nshg)         :: p
550      real(kind=8),intent(inout),dimension(nshg)      :: q
551      real(kind=8),intent(in),dimension(nshg,4)            :: diag
552      integer,intent(in),dimension(nshg+1)            :: colm
553      integer,intent(in),dimension(nnz_tot)           :: rowp
554      real(kind=8),intent(in),dimension(9,nnz_tot)    :: lhsK
555      real(kind=8),intent(in),dimension(4,nnz_tot)    :: lhsP
556      integer,intent(in),dimension(nlwork)            :: ilwork
557      integer,intent(in),dimension(nshg)              :: iBC,iper
558      real(kind=8),intent(in),dimension(nshg,ndofBC)  :: BC
559
560      real(kind=8),dimension(nshg,1) :: t1,t1a
561      real(kind=8),dimension(nshg,3) :: t2,t2a
562      real(kind=8),dimension(nshg,4) :: t2b
563
564      ! scaling
565      call fMtxVdimVecMult(p,amg_ppeDiag(1)%p,t1a,1,1,1,1,nshg)
566      call fMtxVdimVecMult(t1a,diag(:,4),t1,1,1,1,1,nshg)
567      !call ramg_dump(t1,nshg,'ppe_t1_a  ')
568      ! G
569      call commOut(t1,ilwork,1,iper,iBC,BC)
570      call fLesSparseApG(colm,rowp,lhsP,t1,t2,nshg,nnz_tot)
571      call commIn(t2,ilwork,3,iper,iBC,BC)
572      ! K^{-1}
573      call fMtxVdimVecMult(t2,diag,t2a,3,4,3,3,nshg)
574      call fMtxVdimVecMult(t2a,diag,t2,3,4,3,3,nshg)
575         ! note: different from lestools.c (3,4,4,3)
576         ! t1 should be good to use by now
577      ! G^T ... + C
578      t2b(:,1:3) = -t2(:,1:3)
579      t2b(:,4) = t1(:,1)
580      call commOut(t2b,ilwork,4,iper,iBC,BC)
581      call fLesSparseApNGtC(colm,rowp,lhsP,t2b,t1,nshg,nnz_tot)
582      call commIn(t1,ilwork,1,iper,iBC,BC)
583      ! scaling again
584      call fMtxVdimVecMult(t1,diag(:,4),t1a,1,1,1,1,nshg)
585      call fMtxVdimVecMult(t1a,amg_ppeDiag(1)%p,q,1,1,1,1,nshg)
586
587      end subroutine
588
589!*******************************************************************
590!      ramg_PPErhs
591!      produce a globally correct rhs
592!*******************************************************************
593      subroutine ramg_PPErhs(rhsp,rhsg,diag,
594     &                      colm,rowp,lhsK,lhsP,
595     &                      ilwork,BC,iBC,iper)
596      use ramg_data
597      include "common.h"
598
599
600      real(kind=8),intent(inout),dimension(nshg)         :: rhsp
601      real(kind=8),intent(in),dimension(nshg,4)      :: rhsg
602      real(kind=8),intent(in),dimension(nshg,4)            :: diag
603      integer,intent(in),dimension(nshg+1)            :: colm
604      integer,intent(in),dimension(nnz_tot)           :: rowp
605      real(kind=8),intent(in),dimension(9,nnz_tot)    :: lhsK
606      real(kind=8),intent(in),dimension(4,nnz_tot)    :: lhsP
607      integer,intent(in),dimension(nlwork)            :: ilwork
608      integer,intent(in),dimension(nshg)              :: iBC,iper
609      real(kind=8),intent(in),dimension(nshg,ndofBC)  :: BC
610
611      real(kind=8),dimension(nshg,1) :: t1
612      real(kind=8),dimension(nshg,3) :: t2,t2a
613
614      ! scaling
615      ! call fMtxVdimVecMult(rhsg,diag(:,4),t1,1,4,1,1,nshg)
616      ! call ramg_dump(t1,nshg,'ppe_t1_a  ')
617      ! G
618      t2 = rhsg(:,1:3)
619      t1 = rhsg(:,4:4)
620      call commIn(t1,ilwork,1,iper,iBC,BC)
621      call commOut(t1,ilwork,1,iper,iBC,BC)
622      call commIn(t2,ilwork,3,iper,iBC,BC)
623      call commOut(t2,ilwork,3,iper,iBC,BC)
624      ! K^{-1}
625      call fMtxVdimVecMult(t2,diag,t2a,3,4,3,3,nshg)
626      call fMtxVdimVecMult(t2a,diag,t2,3,4,3,3,nshg)
627      call commOut(t2,ilwork,3,iper,iBC,BC)
628      call fLesSparseApNGt(colm,rowp,lhsP,t2,rhsp,nshg,nnz_tot)
629      call commIn(rhsp,ilwork,1,iper,iBC,BC)
630      call commOut(rhsp,ilwork,1,iper,iBC,BC)
631      call ramg_zeroOut(rhsp,ilwork,BC,iBC,iper)
632      ! -G^T ... + Rc
633      rhsp = rhsp - t1(:,1)
634      ! scaling again
635      call fMtxVdimVecMult(rhsp,diag(:,4),t1,1,1,1,1,nshg)
636      rhsp = t1(:,1)
637
638      end subroutine ! ramg_PPErhs
639
640!********************************************************
641!     ramg_init_ilwork(ilwork,BC,iBC,iper)
642!     Initialize amg_ilwork(level)%p(:) array
643!     For each level, same structure as ilwork
644!
645!********************************************************
646      subroutine ramg_init_ilwork(ilwork,BC,iBC,iper)
647      use ramg_data
648      include "common.h"
649      include "mpif.h"
650      include "auxmpi.h"
651      integer, intent(in), dimension(nlwork)           :: ilwork
652      integer, intent(in),dimension(nshg)              :: iBC,iper
653      real(kind=8),intent(in),dimension(nshg,ndofBC)   :: BC
654
655      integer :: lvl,i,j,iacc,iother,numseg,numtask,itkbeg,sigi,k
656      integer :: tasknnztot,itask,amgseg
657      integer,dimension(numpe) :: revp
658      integer,dimension(ilwork(1)) :: tasknnz,iotherl
659      type(i1d),dimension(ilwork(1)) :: taskfill
660
661      integer,dimension(nshg) :: lvlmap
662
663      character :: fname*80
664
665!      call ramg_dump_i(amg_cfmap,nshg,1,'amgcfmap  ')
666      !write(*,*)'mcheck ilwork,',myrank,amg_nshg
667
668      if (numpe.le.1) return
669
670      if (ramg_setup_flag.ne.0) return
671
672      numtask = ilwork(1)
673
674      do lvl = 1,ramg_levelx
675!         lvl = 1 ! test for 1 level only
676
677         ! Create mapping from base to coarse lvl
678         ! lvlmap(baselevelindex) = coarselevelindex
679         lvlmap=0
680         k = 0
681         do i=1,nshg
682            if (amg_cfmap(i).ge.lvl) then
683                k = k+1
684                lvlmap(i) = k
685            endif
686         enddo
687
688         ! Count nnz for each task
689         itkbeg=1
690         tasknnz = 0
691         do i=1,numtask
692            iacc=ilwork(itkbeg+2)
693            iother=ilwork(itkbeg+3) ! starts from 0
694            numseg=ilwork(itkbeg+4)
695            do j=1,numseg
696               k=ilwork(itkbeg+3+2*j) ! row index
697               if (amg_cfmap(k).ge.lvl) then
698                   tasknnz(i) = tasknnz(i) + 1
699               endif
700            enddo
701            itkbeg=itkbeg+4+2*numseg
702         enddo
703         tasknnztot = sum(tasknnz)
704         !write(*,*)'mcheck ilwork',myrank,lvl,tasknnz
705
706         ! Fill in ilwork array at each lvl
707
708         amg_nlwork(lvl)=tasknnztot+1+2*numtask
709         allocate(amg_ilwork(lvl)%p(tasknnztot+1+2*numtask))
710         amg_ilwork(lvl)%p = 0
711
712         amg_ilwork(lvl)%p(1) = numtask
713         itkbeg = 1
714         kk = 1 ! pointer to amg_ilwork array
715         do i=1,numtask
716            iacc = ilwork(itkbeg+2)
717            iother = ilwork(itkbeg+3)+1
718            if (iacc.eq.0) iother=-iother
719            kk = kk+1
720            amg_ilwork(lvl)%p(kk) = iother  ! first put iother
721            kk = kk+1
722            amg_ilwork(lvl)%p(kk) = tasknnz(i) ! then numseg
723            numseg = ilwork(itkbeg+4)
724            do j=1,numseg
725               k = ilwork(itkbeg+3+2*j)
726               if (amg_cfmap(k).ge.lvl) then
727                   kk = kk+1
728                   amg_ilwork(lvl)%p(kk)=lvlmap(k)
729               endif
730            enddo
731            itkbeg=itkbeg+4+2*numseg
732         enddo
733         !write(fname,'((A9)(I1))')'amgilwork',lvl
734         !call ramg_dump_i(amg_ilwork(lvl)%p,amg_nlwork(lvl),1,fname)
735      enddo
736
737      end subroutine ! ramg_init_ilwork
738
739
740!*******************************************************
741!      ramg_initBCflag: Setup amg_paramap on PPE level(level0)
742!      -1: self, n: neighbour proc number
743!*******************************************************
744      subroutine ramg_initBCflag(flag,ilwork,BC,iBC,iper)
745      use ramg_data
746      include "common.h"
747      include "mpif.h"
748      include "auxmpi.h"
749
750      integer,intent(inout),dimension(nshg)            :: flag
751
752      integer, intent(in), dimension(nlwork)           :: ilwork
753      integer, intent(in),dimension(nshg)              :: iBC,iper
754      real(kind=8),intent(in),dimension(nshg,ndofBC)   :: BC
755
756      integer   :: numtask,itag,iacc,iother,numseg,isgbeg,itkbeg
757      integer   :: i,j,k,lenseg,nr,nj,mi,mj
758      integer,allocatable,dimension(:)    :: neimap
759      character*5 cname
760      character*255 fname,fname1
761
762      flag = myrank+1!note*
763
764      if (numpe.lt.2) then
765          IF (iamg_reduce.gt.1) then
766          ! this block of code create reduced case
767              nr = iamg_reduce
768              allocate(reducemap(nr,nr))
769              allocate(rmap1d(2,nr*nr)) ! rmap1d(master,slave,rankid)
770              reducemap = 0
771              rmap1d = 0
772              rmapmax = 1
773              do i=1,nr
774                 reducemap(i,i) = i
775                 rmap1d(1,i) = i
776                 rmap1d(2,i) = i
777              enddo
778              rmapmax = nr+1
779              write(fname,"((I1)(A))")nr,'-procs_case/neimap.dat'
780              !fname=trim(cname(nr))//'-proc_case/map_nproc.dat'
781              fname=trim(fname)
782              do i=1,nr
783                 fname1 = trim(fname)//cname(i)
784                 write(*,*)'mcheck reduced case:',fname1
785                 open(264,file=trim(fname1),status='unknown')
786                 read(264,*)nj
787                 do j=1,nj
788                    read(264,*)mi,mj
789                    if (mj.gt.0) then
790                        reducemap(i,mj) = rmapmax
791                        reducemap(mj,i) = -rmapmax
792                        write(*,*)'mcheck reducemap,',i,mj,rmapmax
793                        rmap1d(1,rmapmax) = i
794                        rmap1d(2,rmapmax) = mj
795                        rmapmax = rmapmax + 1
796                    end if
797                 enddo
798                 close(264)
799              enddo
800!              call ramg_dump_i(reducemap,nr,nr,'reducemap ')
801              call ramg_dump_ic(rmap1d,2,rmapmax,'rmap1d    ')
802              flag = 0
803              write(fname,"((I1)(A))")nr,'-procs_case/map_nproc.dat'
804              fname=trim(fname)
805              do i=nr,1,-1
806                 fname1 = trim(fname)//cname(i)
807                 open(264,file=trim(fname1),status='unknown')
808                 read(264,*)nj
809                 do j=1,nj
810                    read(264,*)mi,mj
811                    if (flag(mj).eq.0) then
812                        flag(mj) = i
813                    else if (flag(mj).le.nr) then
814                        !if ((flag(mj).eq.2).and.(i.eq.4)) then
815                      !write(*,*)'mcheck!',mj,reducemap(flag(mj),i)
816                        !endif
817                        flag(mj) = iabs(reducemap(flag(mj),i))
818                    end if
819                 enddo
820                 close(264)
821              enddo
822              call ramg_dump_i(flag,nshg,1,'initflagbc')
823!              flag = myrank + 1
824          ENDIF
825          return
826      end if
827
828      IF (numpe.gt.1) THEN
829      numtask = ilwork(1)
830      allocate(neimap(numtask))
831      itkbeg = 1
832      do i=1,numtask
833         iacc = ilwork(itkbeg+2)
834         iother = ilwork(itkbeg+3)
835         numseg = ilwork(itkbeg+4)
836         if (iacc.eq.0) then !slave
837             neimap(i) = -(iother+1)
838         else !master
839             neimap(i) = (iother+1)
840         endif
841         do j=1,numseg
842            isgbeg = ilwork(itkbeg+3+j*2)
843            lenseg = ilwork(itkbeg+4+j*2)
844            isgend = isgbeg + lenseg - 1
845            flag(isgbeg:isgend) = neimap(i)!iother+1!note*
846         enddo
847         itkbeg = itkbeg + 4 + 2*numseg
848      enddo
849
850      !call ramg_dump_i(flag,nshg,1,'amgparamap')
851
852      !if (iamg_reduce.gt.0) then
853          !call ramg_dump_i(neimap,numtask,1,'neimap    ')
854      !endif
855      deallocate(neimap)
856      ENDIF
857
858      ! note*: +1 to make paramap array range from 1 to numprocs
859      ! this will avoid 0.
860      ! n=proc indicates neighbour
861      ! n=-proc indicates no coarsening necessary or other info
862
863
864      end subroutine ! ramg_initBCflag
865
866!****************************************************************
867!      commOut_i : commOut for integer arrays, pack commOut
868!****************************************************************
869      subroutine commOut_i(global,ilwork,n,iper,iBC,BC)
870      include "common.h"
871      integer global(nshg,n)
872      real*8  BC(nshg,ndofBC)
873      integer ilwork(nlwork),iper(nshg),iBC(nshg)
874      ! temp array
875      real(kind=8)  aglobal(nshg,n)
876      integer i,j
877      do i=1,nshg
878      do j=1,n
879         aglobal(i,j) = global(i,j)
880      enddo
881      enddo
882      call commOut(aglobal,ilwork,n,iper,iBC,BC)
883      do i=1,nshg
884      do j=1,n
885         global(i,j)=aglobal(i,j)
886      enddo
887      enddo
888      end subroutine ! commOut_i
889
890!****************************************************************
891!      ramg_commOut: commOut for amg array in higher level
892!****************************************************************
893      subroutine ramg_commOut(global,level,ilwork,redun,iper,iBC,BC)
894      use ramg_data
895      include "common.h"
896      include "mpif.h"
897      include "auxmpi.h"
898
899      integer,intent(in)                            :: level
900      integer,intent(in) :: redun
901      real(kind=8),intent(inout),dimension(amg_nshg(level),redun)
902     &           :: global
903      integer,intent(in),dimension(nlwork)        :: ilwork
904      integer,intent(in),dimension(nshg)            :: iBC,iper
905      real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC
906
907      integer :: numtask,iacc,itag(ilwork(1)),i,j,m,k
908      integer :: iother(ilwork(1)),ierr,iotheri
909      integer :: numseg(ilwork(1))
910      integer :: mstat(MPI_STATUS_SIZE,ilwork(1))
911      integer :: req(ilwork(1))
912      type(r2d),dimension(ilwork(1)) :: tmparray
913
914      if (numpe.le.1) return
915
916      if (level.eq.1) then
917         call commOut(global,ilwork,1,iper,iBC,BC)
918         return
919      endif
920
921      numtask = amg_ilwork(level)%p(1)
922
923      lother = -1
924      itkbeg=1
925      do i=1,numtask
926         itag(i)=ilwork(itkbeg+1)
927         itkbeg=itkbeg+4+2*ilwork(itkbeg+4)
928      enddo
929      itkbeg=1
930      do i=1,numtask
931         iother(i)=amg_ilwork(level)%p(itkbeg+1)
932         numseg(i)=amg_ilwork(level)%p(itkbeg+2)
933         allocate(tmparray(i)%p(numseg(i),redun))
934         itkbeg=itkbeg+2
935         do j=1,numseg(i)
936            itkbeg=itkbeg+1
937         tmparray(i)%p(j,:)=global(amg_ilwork(level)%p(itkbeg),:)
938         enddo
939      enddo
940
941      if ((myrank.eq.1).and.(level.eq.2)) then ! debug
942!          write(*,*)'mcheck debug ilwork',amg_nlwork(2)
943!          call ramg_dump(global,amg_nshg(level),'debuglobal')
944!      call ramg_dump_i(amg_ilwork(2)%p,amg_nlwork(2),1,'debugilwok')
945!          call ramg_dump(tmparray(1)%p,numseg(1),'debuglvl2 ')
946      endif
947
948      IF (.TRUE.) THEN
949      m =0
950      do i=1,numtask
951         m = m+1
952         iotheri = iabs(iother(i))-1
953         if (iother(i)>0) then ! master send
954!      write(*,*)'mcheck commou send',myrank,iotheri,numseg(i),itag(i)
955            call MPI_ISEND(tmparray(i)%p(1,1),redun*numseg(i),
956     &      MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD,
957     &      req(m),ierr)
958         else ! slave receive
959!      write(*,*)'mcheck commou recv',myrank,iotheri,numseg(i),itag(i)
960            call MPI_IRECV(tmparray(i)%p(1,1),redun*numseg(i),
961     &      MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD,
962     &      req(m),ierr)
963         endif
964      enddo
965
966!      write(*,*)'mcheck commout m,',m
967
968      call MPI_WAITALL(m,req,mstat,ierr)
969      ENDIF
970
971      ! put back
972
973      itkbeg=1
974      do i=1,numtask
975         numseg(i)=amg_ilwork(level)%p(itkbeg+2)
976         itkbeg=itkbeg+2
977         do j=1,numseg(i)
978            itkbeg=itkbeg+1
979            global(amg_ilwork(level)%p(itkbeg),:)=tmparray(i)%p(j,:)
980         enddo
981      enddo
982
983      do i=1,numtask
984         deallocate(tmparray(i)%p)
985      enddo
986
987      end subroutine ! ramg_commOut
988
989      subroutine ramg_commIn(global,level,ilwork,redun,iper,iBC,BC)
990      use ramg_data
991      include "common.h"
992      include "mpif.h"
993      include "auxmpi.h"
994
995      integer,intent(in)                            :: level
996      integer,intent(in) :: redun
997      real(kind=8),intent(inout),dimension(amg_nshg(level),redun)
998     &           :: global
999      integer,intent(in),dimension(nlwork)        :: ilwork
1000      integer,intent(in),dimension(nshg)            :: iBC,iper
1001      real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC
1002
1003      integer :: numtask,iacc,itag(ilwork(1)),i,j,m,k
1004      integer :: iother(ilwork(1)),ierr,iotheri
1005      integer :: numseg(ilwork(1))
1006      integer :: mstat(MPI_STATUS_SIZE,ilwork(1))
1007      integer :: req(ilwork(1))
1008      type(r2d),dimension(ilwork(1)) :: tmparray
1009
1010      if (numpe.le.1) return
1011
1012      if (level.eq.1) then
1013         call commIn(global,ilwork,1,iper,iBC,BC)
1014         return
1015      endif
1016
1017      numtask = amg_ilwork(level)%p(1)
1018
1019      lother = -1
1020      itkbeg=1
1021      do i=1,numtask
1022         itag(i)=ilwork(itkbeg+1)
1023         itkbeg=itkbeg+4+2*ilwork(itkbeg+4)
1024      enddo
1025      itkbeg=1
1026      do i=1,numtask
1027         iother(i)=amg_ilwork(level)%p(itkbeg+1)
1028         numseg(i)=amg_ilwork(level)%p(itkbeg+2)
1029         allocate(tmparray(i)%p(numseg(i),redun))
1030         itkbeg=itkbeg+2
1031         do j=1,numseg(i)
1032            itkbeg=itkbeg+1
1033         tmparray(i)%p(j,:)=global(amg_ilwork(level)%p(itkbeg),:)
1034         enddo
1035      enddo
1036
1037      IF (.TRUE.) THEN
1038      m =0
1039      do i=1,numtask
1040         m = m+1
1041         iotheri = iabs(iother(i))-1
1042         if (iother(i)>0) then ! master receive
1043            call MPI_IRECV(tmparray(i)%p(1,1),redun*numseg(i),
1044     &      MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD,
1045     &      req(m),ierr)
1046         else ! slave send
1047            call MPI_ISEND(tmparray(i)%p(1,1),redun*numseg(i),
1048     &      MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD,
1049     &      req(m),ierr)
1050         endif
1051      enddo
1052
1053!      write(*,*)'mcheck commout m,',m
1054
1055      call MPI_WAITALL(m,req,mstat,ierr)
1056      ENDIF
1057
1058      ! put back
1059
1060      itkbeg=1
1061      do i=1,numtask
1062         numseg(i)=amg_ilwork(level)%p(itkbeg+2)
1063         itkbeg=itkbeg+2
1064         do j=1,numseg(i)
1065            itkbeg=itkbeg+1
1066            if (iother(i)>0) then ! master
1067            global(amg_ilwork(level)%p(itkbeg),:)=
1068     &      global(amg_ilwork(level)%p(itkbeg),:)+tmparray(i)%p(j,:)
1069            else
1070            global(amg_ilwork(level)%p(itkbeg),:)=0
1071            endif
1072         enddo
1073      enddo
1074
1075      do i=1,numtask
1076         deallocate(tmparray(i)%p)
1077      enddo
1078
1079      end subroutine ! ramg_commIn
1080
1081      subroutine ramg_mapv2g(level,carray,garray)
1082      ! map to level 0 array
1083      use ramg_data
1084      include "common.h"
1085      integer,intent(in)            :: level
1086      real(kind=8),intent(inout),dimension(nshg) :: garray
1087      real(kind=8),intent(in),dimension(amg_nshg(level)) :: carray
1088
1089      integer :: i,j,k
1090      integer,dimension(amg_nshg(level)) :: revmap
1091      revmap = 0
1092      garray(:) = 0
1093      j = 1
1094      do i=1,nshg
1095         if (amg_cfmap(i).ge.level) then
1096             revmap(j) = i
1097             j = j+1
1098         end if
1099      enddo
1100      do i=1,j-1
1101         garray(revmap(i)) = carray(i)
1102      enddo
1103
1104      end subroutine !ramg_mapv2g
1105
1106      subroutine ramg_mapg2v(level,carray,garray)
1107      use ramg_data
1108      include "common.h"
1109      integer,intent(in)            :: level
1110      real(kind=8),intent(in),dimension(nshg) :: garray
1111      real(kind=8),intent(inout),dimension(amg_nshg(level)) ::carray
1112      integer :: i,j,k
1113      carray(:) = 0
1114      j = 1
1115      do i=1,nshg
1116         if (amg_cfmap(i).ge.level) then
1117             carray(j) = garray(i)
1118             j = j+1
1119         end if
1120      enddo
1121      end subroutine !ramg_mapg2v
1122