1 2!************************************************************** 3! ramg_global_PPE 4! Make lhsP global complete 5!************************************************************** 6 7 subroutine ramg_global_lhs( 8 & acolm,arowp,alhsP,annz_tot, 9 & lhsGPcolm,lhsGProwp,lhsGP, 10 & redun, 11 & ilwork,BC,iBC,iper) 12 use ramg_data 13 include "common.h" 14 include "mpif.h" 15 include "auxmpi.h" 16 17 integer,intent(in) :: annz_tot 18 integer,intent(in),dimension(nshg+1) :: acolm 19 integer,intent(in),dimension(annz_tot) :: arowp 20 integer,intent(in) :: redun 21 real(kind=8),intent(in),dimension(redun,annz_tot) :: alhsP 22! real(kind=8),dimension(:,:),allocatable :: lhsGP 23! integer,dimension(:),allocatable :: lhsGProwp 24! integer,dimension(:),allocatable :: lhsGPcolm 25 type(r2d),intent(inout) :: lhsGP 26 type(i1d),intent(inout) :: lhsGProwp,lhsGPcolm 27 integer,intent(in),dimension(nlwork) :: ilwork 28 integer,intent(in),dimension(nshg) :: iBC,iper 29 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 30 31 integer :: numtask,i,j,k,m,p,ki,kj,krowp,ierr 32 integer :: gi,gj,gk 33 integer :: itag,iacc,iother,numseg,isgbeg,itkbeg 34 integer :: stat(MPI_STATUS_SIZE,redun*numpe),req(redun*numpe) 35 integer :: mcheck 36 37 real(kind=8) :: swaptemp(redun) 38 39 character fname*10 40 41 ! temp variables, used for communication 42 43 ! each task has a set of storage rooms for the submatrix 44 ! to communicate 45 46 integer,dimension(nshg) :: tmp_rowmap,tmp_revrmap 47 real(kind=8),dimension(redun,nshg) :: tmp_rmtx 48 49 integer,dimension(nshg) :: Pflag 50 integer,dimension(nshg+1) :: Pcolm 51 type(i2dd) :: Prowp 52 type(r12dd) :: Pmtx 53 integer :: rownnz 54 55 56 if (numpe .le. 1) then 57 allocate(lhsGP%p(redun,annz_tot),stat=ierr) 58 allocate(lhsGProwp%p(annz_tot)) 59 allocate(lhsGPcolm%p(nshg+1)) 60 lhsGP%p(:,:) = alhsP(:,:) 61 lhsGProwp%p(:) = arowp(:) 62 lhsGPcolm%p(:) = acolm(:) 63 return 64 end if 65 66 numtask = ilwork(1) 67 m = 0 68 itkbeg = 1 69 70 revmap = 0 71 72 allocate(sub_map%pp(numtask)) 73 allocate(sub_revmap%pp(numtask)) 74 allocate(sub_colm%pp(numtask)) 75 allocate(sub_colm2%pp(numtask)) 76 allocate(sub_rowp%pp(numtask)) 77 allocate(sub_rowp2%pp(numtask)) 78 allocate(sub_rowpmap%pp(numtask)) 79 allocate(sub_mtx%pp(numtask)) 80 allocate(sub_mtx2%pp(numtask)) 81 allocate(sub_nnz%p(numtask)) 82 allocate(sub_nnz2%p(numtask)) 83 allocate(sub_nshg%p(numtask)) 84 85 ! submatrix : colm 86 do i=1,numtask 87 88 ! Beginning of each task 89 m = m+1 90 ! Task head information, ref commu.f 91 itag = ilwork(itkbeg+1) 92 iacc = ilwork(itkbeg+2) 93 iother = ilwork(itkbeg+3) 94 numseg = ilwork(itkbeg+4) 95 isgbeg = ilwork(itkbeg+5) 96 97 sub_nshg%p(i) = numseg 98 99 allocate(sub_map%pp(i)%p(numseg)) 100 allocate(sub_revmap%pp(i)%p(nshg)) 101 102 sub_map%pp(i)%p = 0 103 sub_revmap%pp(i)%p = 0 104 105 ! prepare vector mapping 106 do j=1,numseg 107 k = ilwork(itkbeg+3+2*j) ! row idx 108 sub_map%pp(i)%p(j) = k 109 sub_revmap%pp(i)%p(k) = j 110! if ((myrank.eq.1).and.(iother.eq.3)) then 111! write(*,*)'mcheck:',myrank,iother,j,k,ncorp_map(k) 112! endif 113 enddo 114 115 allocate(sub_colm%pp(i)%p(numseg+1)) 116 allocate(sub_colm2%pp(i)%p(numseg+1)) 117 118 ! prepare matrix mapping, sub matrix colm 119 sub_nnz%p(i) = 0 120 do j=1,numseg 121 sub_colm%pp(i)%p(j) = sub_nnz%p(i) + 1 122 ki = sub_map%pp(i)%p(j) 123 do kj = acolm(ki),acolm(ki+1)-1 124 krowp = arowp(kj) 125 if (sub_revmap%pp(i)%p(krowp).ne.0) then 126! if ((ncorp_map(ki).eq.964)) then 127! if ((ncorp_map(krowp).eq.884)) then 128! write(*,*)'964/884 prepared in',myrank,i,iother 129! else 130! write(*,*)'line 964:',myrank,i,iother,ncorp_map(krowp) 131! endif 132! endif 133 sub_nnz%p(i) = sub_nnz%p(i) + 1 134 end if 135 enddo 136 !write(*,*)'mcheck: ',myrank,j,sub_nnz 137 enddo 138 sub_colm%pp(i)%p(numseg+1) = sub_nnz%p(i) + 1 139 140 if (iacc.eq.0) then ! this task is a send, master 141 call MPI_ISEND(sub_colm%pp(i)%p(1),numseg+1,MPI_INTEGER, 142 & iother,itag,MPI_COMM_WORLD,req(m),ierr) 143 call MPI_IRECV(sub_colm2%pp(i)%p(1),numseg+1,MPI_INTEGER, 144 & iother,itag,MPI_COMM_WORLD,req(m+1),ierr) 145 146 else if (iacc.eq.1) then ! this task is a receive, slave 147 call MPI_IRECV(sub_colm2%pp(i)%p(1),numseg+1,MPI_INTEGER, 148 & iother,itag,MPI_COMM_WORLD,req(m),ierr) 149 call MPI_ISEND(sub_colm%pp(i)%p(1),numseg+1,MPI_INTEGER, 150 & iother,itag,MPI_COMM_WORLD,req(m+1),ierr) 151 152 end if 153 154 m = m + 1 155 156 ! Task end, next task 157 itkbeg = itkbeg + 4 + 2*numseg 158 159 enddo 160 161 call MPI_WAITALL(m,req,stat,ierr) 162 163 do i=1,numtask 164 sub_nnz2%p(i) = sub_colm2%pp(i)%p(sub_nshg%p(i)+1)-1 165 enddo 166 167 ! sub matrix : rowp,mtx 168 m = 0 169 itkbeg = 1 170 do i=1,numtask 171 172 ! Beginning of each task 173 m = m+1 174 ! Task head information, ref commu.f 175 itag = ilwork(itkbeg+1) 176 iacc = ilwork(itkbeg+2) 177 iother = ilwork(itkbeg+3) 178 numseg = ilwork(itkbeg+4) 179 isgbeg = ilwork(itkbeg+5) 180 181 allocate(sub_rowp%pp(i)%p(sub_nnz%p(i))) 182 allocate(sub_rowp2%pp(i)%p(sub_nnz2%p(i))) 183 allocate(sub_rowpmap%pp(i)%p(sub_nnz%p(i))) 184 allocate(sub_mtx%pp(i)%p(redun,sub_nnz%p(i))) 185 allocate(sub_mtx2%pp(i)%p(redun,sub_nnz2%p(i))) 186 187 ! prepare matrix mapping, sub matrix rowp 188 k = 0 189 do j=1,numseg 190 ki = sub_map%pp(i)%p(j) 191 do kj = acolm(ki),acolm(ki+1)-1 192 krowp = arowp(kj) 193 if (sub_revmap%pp(i)%p(krowp).ne.0) then 194 k = k + 1 195 sub_rowp%pp(i)%p(k) = sub_revmap%pp(i)%p(krowp) 196 sub_rowpmap%pp(i)%p(k) = kj 197 do p=1,redun 198 sub_mtx%pp(i)%p(p,k) = alhsP(p,kj) 199 enddo 200 end if 201 enddo 202 enddo 203 204 if (iacc.eq.0) then ! this task is a send, master 205 call MPI_ISEND(sub_rowp%pp(i)%p(1),sub_nnz%p(i),MPI_INTEGER, 206 & iother,itag,MPI_COMM_WORLD,req(m),ierr) 207 call MPI_IRECV(sub_rowp2%pp(i)%p(1),sub_nnz2%p(i),MPI_INTEGER, 208 & iother,itag,MPI_COMM_WORLD,req(m+1),ierr) 209 210 else if (iacc.eq.1) then ! this task is a receive, slave 211 call MPI_IRECV(sub_rowp2%pp(i)%p(1),sub_nnz2%p(i),MPI_INTEGER, 212 & iother,itag,MPI_COMM_WORLD,req(m),ierr) 213 call MPI_ISEND(sub_rowp%pp(i)%p(1),sub_nnz%p(i),MPI_INTEGER, 214 & iother,itag,MPI_COMM_WORLD,req(m+1),ierr) 215 end if 216 m = m + 2 217 if (iacc.eq.0) then ! this task is a send, master 218 call MPI_ISEND(sub_mtx%pp(i)%p(1,1),redun*sub_nnz%p(i), 219 & MPI_DOUBLE_PRECISION, 220 & iother,itag,MPI_COMM_WORLD,req(m),ierr) 221 call MPI_IRECV(sub_mtx2%pp(i)%p(1,1),redun*sub_nnz2%p(i), 222 & MPI_DOUBLE_PRECISION, 223 & iother,itag,MPI_COMM_WORLD,req(m+1),ierr) 224 225 else if (iacc.eq.1) then ! this task is a receive, slave 226 call MPI_IRECV(sub_mtx2%pp(i)%p(1,1),redun*sub_nnz2%p(i), 227 & MPI_DOUBLE_PRECISION, 228 & iother,itag,MPI_COMM_WORLD,req(m),ierr) 229 call MPI_ISEND(sub_mtx%pp(i)%p(1,1),redun*sub_nnz%p(i), 230 & MPI_DOUBLE_PRECISION, 231 & iother,itag,MPI_COMM_WORLD,req(m+1),ierr) 232 end if 233 234 m = m + 1 235 236 ! Task end, next task 237 itkbeg = itkbeg + 4 + 2*numseg 238 239 enddo 240 241 call MPI_WAITALL(m,req,stat,ierr) 242 243 if (.false.) then 244 ! dump some matrices for matlab 245 do i=1,numtask 246 write(fname,'((A4)(I1)(A5))')'subP',i,' ' 247 !call ramg_dump_i(ncorp_map,nshg,1,'pam_corpmd') 248! call ramg_dump_matlab_map(sub_colm%pp(i)%p,sub_rowp%pp(i)%p, 249! & sub_mtx%pp(i)%p,sub_nshg%p(i),sub_nnz%p(i),4, 250! & sub_map%pp(i)%p,fname) 251! enddo 252! do i=1,numtask 253! write(fname,'((A5)(I1)(A4))')'subPs',i,' ' 254! !call ramg_dump_i(ncorp_map,nshg,1,'pam_corpmd') 255! call ramg_dump_matlab_map(sub_colm2%pp(i)%p,sub_rowp2%pp(i)%p, 256! & sub_mtx2%pp(i)%p,sub_nshg%p(i),sub_nnz2%p(i),4, 257! & sub_map%pp(i)%p,fname) 258 enddo 259 endif ! dump? no dump? 260 261! call ramg_dump_matlab_map(acolm,arowp,alhsP,nshg,annz_tot,redun, 262! & 'locallhsPb') 263 264 ! merge with extra entries 265 266 Pflag = 0 267 allocate(Prowp%pp(nshg)) 268 allocate(Pmtx%pp(nshg)) 269 Pcolm = 0 270 271 do i=1,numtask ! each task 272 do j=1,sub_nshg%p(i) ! each line 273 274 tmp_rowmap = 0 275 tmp_revrmap = 0 276 tmp_rmtx = 0 277 rownnz = 0 278 279 ! Prepare 280 ki = sub_map%pp(i)%p(j) 281 if (Pflag(ki).eq.0) then ! a new line 282 Pflag(ki) = 1 283 do k=acolm(ki),acolm(ki+1)-1 284 kj = arowp(k) 285 tmp_rowmap(kj) = k-acolm(ki)+1 286 tmp_revrmap(k-acolm(ki)+1)=kj 287 tmp_rmtx(:,kj) = alhsP(:,k) 288 enddo 289 rownnz = rownnz+acolm(ki+1)-acolm(ki) 290 else ! existing line 291 ! expand sparse line to full line 292 do k=1,Pcolm(ki) 293 kj = Prowp%pp(ki)%p(k) 294 tmp_rowmap(kj) = k 295 tmp_revrmap(k) = kj 296 tmp_rmtx(:,kj) = Pmtx%pp(ki)%p(:,k) 297 enddo 298 rownnz = rownnz+Pcolm(ki) 299 deallocate(Prowp%pp(ki)%p) 300 deallocate(Pmtx%pp(ki)%p) 301 endif 302 303 ! Merge 304 do ki = sub_colm2%pp(i)%p(j),sub_colm2%pp(i)%p(j+1)-1 305 ! each entry in second mtx /slave 306 krowp = sub_rowp2%pp(i)%p(ki) 307 kj = sub_map%pp(i)%p(krowp) 308 if (tmp_rowmap(kj).eq.0) then ! not yet occupied 309 rownnz = rownnz+1 310 tmp_rowmap(kj) = rownnz 311 tmp_revrmap(rownnz) = kj 312 endif 313! if ((ncorp_map(sub_map%pp(i)%p(j)).eq.964)) then 314! if ((ncorp_map(kj).eq.883)) then 315! write(*,*)'paramcheck',myrank,i,sub_mtx2%pp(i)%p(1,ki) 316! else 317! write(*,*)'paramcheck2',myrank,ncorp_map(kj) 318! endif 319! endif 320 do p=1,redun 321 tmp_rmtx(p,kj) = tmp_rmtx(p,kj)+sub_mtx2%pp(i)%p(p,ki) 322 enddo 323 enddo 324 325 ! Store 326 ki = sub_map%pp(i)%p(j) 327 Pcolm(ki) = rownnz 328 allocate(Prowp%pp(ki)%p(rownnz)) 329 allocate(Pmtx%pp(ki)%p(redun,rownnz)) 330 do k=1,rownnz 331 kj = tmp_revrmap(k) 332 Prowp%pp(ki)%p(k) = kj 333 Pmtx%pp(ki)%p(:,k) = tmp_rmtx(:,kj) 334! if ((ncorp_map(ki).eq.964)) then 335! write(*,*)'paramcheck',myrank,i,k,kj,ncorp_map(kj) 336! endif 337! if ((ncorp_map(ki).eq.964).and.(ncorp_map(kj).eq.883)) then 338! write(*,*)'paramcheck',myrank,i,tmp_rmtx(1,kj) 339! endif 340 enddo 341 342 !sort 343 do gi=1,rownnz 344 do gj=gi+1,rownnz 345 if (Prowp%pp(ki)%p(gi).gt.Prowp%pp(ki)%p(gj)) then 346 gk = Prowp%pp(ki)%p(gj) 347 Prowp%pp(ki)%p(gj) = Prowp%pp(ki)%p(gi) 348 Prowp%pp(ki)%p(gi) = gk 349 swaptemp(:) = Pmtx%pp(ki)%p(:,gj) 350 Pmtx%pp(ki)%p(:,gj) = Pmtx%pp(ki)%p(:,gi) 351 Pmtx%pp(ki)%p(:,gi) = swaptemp(:) 352 endif 353 enddo 354 enddo 355 356 enddo 357 enddo 358 359 allocate(lhsGPcolm%p(nshg+1)) 360 361 rownnz = 0 362 k = 0 363 lhsGPcolm%p(1) = 1 364 do i=1,nshg 365 ! colm 366 if (Pflag(i).eq.0) then ! original lhsP 367 rownnz = rownnz+acolm(i+1)-acolm(i) 368 else ! new lhsP 369 rownnz = rownnz+Pcolm(i) 370 k = k+1 371 endif 372 lhsGPcolm%p(i+1)=rownnz+1 373 enddo 374 375! call ramg_dump_i(lhsGPcolm%p,nshg,1,'lhsGPcolm ') 376 377 allocate(lhsGProwp%p(rownnz)) 378 allocate(lhsGP%p(redun,rownnz)) 379 380 rownnz = 1 381 do i=1,nshg 382 ! rowp & mtx. 383 if (Pflag(i).eq.0) then ! original lhsP 384 do j=acolm(i),acolm(i+1)-1 385 lhsGProwp%p(rownnz) = arowp(j) 386 lhsGP%p(:,rownnz) = alhsP(:,j) 387 rownnz = rownnz + 1 388 enddo 389 else ! new lhsP 390 do j=1,Pcolm(i) 391 lhsGProwp%p(rownnz) = Prowp%pp(i)%p(j) 392 lhsGP%p(:,rownnz) = Pmtx%pp(i)%p(:,j) 393 rownnz = rownnz + 1 394 enddo 395 endif 396 enddo 397 398 ! First entry be diagonal for PPE 399 if (redun.eq.1) then 400 loop_i: do i=1,nshg 401 gi = lhsGPcolm%p(i) 402 if (lhsGProwp%p(gi).ne.i) then 403 do j=gi+1,lhsGPcolm%p(i+1)-1 404 k = lhsGProwp%p(j) 405 if (k.eq.i) then !swap first and k(diag) 406! gj = lhsGProwp%p(gi) 407 lhsGProwp%p(j) = lhsGProwp%p(gi) 408 lhsGProwp%p(gi) = i 409 410 swaptemp(:) = lhsGP%p(:,gi) 411 lhsGP%p(:,gi) = lhsGP%p(:,j) 412 lhsGP%p(:,j) = swaptemp(:) 413 cycle loop_i 414 endif 415 enddo 416 endif 417 enddo loop_i 418 endif 419 420 if (redun.eq.1) then ! check PPE 421 do i=1,nshg 422 do j=lhsGPcolm%p(i),lhsGPcolm%p(i+1)-1 423 k = lhsGProwp%p(j) 424 gi = amg_paramap(1)%p(i) 425 gk = amg_paramap(1)%p(k) 426 if ((gi.eq.gk).and.(gi.lt.0) ) then 427 !lhsGP%p(:,j) = 0 428 endif 429 enddo 430 enddo 431 endif 432 433! call ramg_dump_matlab_map(lhsGPcolm%p,lhsGProwp%p,lhsGP%p, 434! & nshg,rownnz-1,redun,'locallhsP ') 435 436! write(*,*)'mcheck',myrank,nnz_tot,rownnz,k 437 438! write(*,*)'mcheck,',myrank,'okay here' 439 440 ! Deallocate temporary arrays 441 do i=1,nshg 442 if (Pflag(i) .eq. 1) then 443 deallocate(Prowp%pp(i)%p) 444 deallocate(Pmtx%pp(i)%p) 445 endif 446 enddo 447! write(*,*)'mcheck deallocate,',myrank 448 449 deallocate(Prowp%pp) 450 deallocate(Pmtx%pp) 451 452 do i=1,numtask 453 454 deallocate(sub_map%pp(i)%p) 455 deallocate(sub_revmap%pp(i)%p) 456 deallocate(sub_colm%pp(i)%p) 457 deallocate(sub_colm2%pp(i)%p) 458 deallocate(sub_rowp%pp(i)%p) 459 deallocate(sub_rowp2%pp(i)%p) 460 deallocate(sub_rowpmap%pp(i)%p) 461 deallocate(sub_mtx%pp(i)%p) 462 deallocate(sub_mtx2%pp(i)%p) 463 464 enddo 465 466 deallocate(sub_map%pp) 467 deallocate(sub_revmap%pp) 468 deallocate(sub_rowpmap%pp) 469 deallocate(sub_nnz%p) 470 deallocate(sub_nnz2%p) 471 deallocate(sub_nshg%p) 472 deallocate(sub_colm%pp) 473 deallocate(sub_colm2%pp) 474 deallocate(sub_rowp%pp) 475 deallocate(sub_rowp2%pp) 476 deallocate(sub_mtx%pp) 477 deallocate(sub_mtx2%pp) 478 479 end subroutine ! ramg_global_lhs 480 481!******************************************************************* 482! ramg_PPEAp 483! produce parallel A-p product for PPE correctly 484! q = PPE * p without scaling 485!******************************************************************* 486 subroutine ramg_PPEAp(q,p, 487 & colm,rowp,lhsK,lhsP, 488 & ilwork,BC,iBC,iper) 489 use ramg_data 490 include "common.h" 491 492 493 real(kind=8),intent(in),dimension(nshg) :: p 494 real(kind=8),intent(inout),dimension(nshg) :: q 495 integer,intent(in),dimension(nshg+1) :: colm 496 integer,intent(in),dimension(nnz_tot) :: rowp 497 real(kind=8),intent(in),dimension(9,nnz_tot) :: lhsK 498 real(kind=8),intent(in),dimension(4,nnz_tot) :: lhsP 499 integer,intent(in),dimension(nlwork) :: ilwork 500 integer,intent(in),dimension(nshg) :: iBC,iper 501 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 502 503 real(kind=8),dimension(nshg,1) :: t1,t1a 504 real(kind=8),dimension(nshg,3) :: t2,t2a 505 real(kind=8),dimension(nshg,4) :: t2b 506 real(kind=8),dimension(nshg,4) :: diag 507 508 diag = ramg_flowDiag%p 509 510 ! scaling 511 call fMtxVdimVecMult(p,amg_ppeDiag(1)%p,t1a,1,1,1,1,nshg) 512 call fMtxVdimVecMult(t1a,diag(:,4),t1,1,1,1,1,nshg) 513 !t1(:,1) = p 514 ! G 515 call commOut(t1,ilwork,1,iper,iBC,BC) 516 call fLesSparseApG(colm,rowp,lhsP,t1,t2,nshg,nnz_tot) 517 call commIn(t2,ilwork,3,iper,iBC,BC) 518 ! K^{-1} 519 call fMtxVdimVecMult(t2,diag,t2a,3,4,3,3,nshg) 520 call fMtxVdimVecMult(t2a,diag,t2,3,4,3,3,nshg) 521 ! note: different from lestools.c (3,4,4,3) 522 ! t1 should be good to use by now 523 ! G^T ... + C 524 t2b(:,1:3) = -t2(:,1:3) 525 t2b(:,4) = t1(:,1) 526 call commOut(t2b,ilwork,4,iper,iBC,BC) 527 call fLesSparseApNGtC(colm,rowp,lhsP,t2b,t1,nshg,nnz_tot) 528 call commIn(t1,ilwork,1,iper,iBC,BC) 529 !q = t1(:,1) 530 ! scaling again 531 call fMtxVdimVecMult(t1,diag(:,4),t1a,1,1,1,1,nshg) 532 call fMtxVdimVecMult(t1a,amg_ppeDiag(1)%p,q,1,1,1,1,nshg) 533 534 end subroutine 535 536 537!******************************************************************* 538! ramg_PPEAps 539! produce parallel A-p product for PPE correctly 540! q = PPE * p, WITH SCALING! 541!******************************************************************* 542 subroutine ramg_PPEAps(q,p,diag, 543 & colm,rowp,lhsK,lhsP, 544 & ilwork,BC,iBC,iper) 545 use ramg_data 546 include "common.h" 547 548 549 real(kind=8),intent(in),dimension(nshg) :: p 550 real(kind=8),intent(inout),dimension(nshg) :: q 551 real(kind=8),intent(in),dimension(nshg,4) :: diag 552 integer,intent(in),dimension(nshg+1) :: colm 553 integer,intent(in),dimension(nnz_tot) :: rowp 554 real(kind=8),intent(in),dimension(9,nnz_tot) :: lhsK 555 real(kind=8),intent(in),dimension(4,nnz_tot) :: lhsP 556 integer,intent(in),dimension(nlwork) :: ilwork 557 integer,intent(in),dimension(nshg) :: iBC,iper 558 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 559 560 real(kind=8),dimension(nshg,1) :: t1,t1a 561 real(kind=8),dimension(nshg,3) :: t2,t2a 562 real(kind=8),dimension(nshg,4) :: t2b 563 564 ! scaling 565 call fMtxVdimVecMult(p,amg_ppeDiag(1)%p,t1a,1,1,1,1,nshg) 566 call fMtxVdimVecMult(t1a,diag(:,4),t1,1,1,1,1,nshg) 567 !call ramg_dump(t1,nshg,'ppe_t1_a ') 568 ! G 569 call commOut(t1,ilwork,1,iper,iBC,BC) 570 call fLesSparseApG(colm,rowp,lhsP,t1,t2,nshg,nnz_tot) 571 call commIn(t2,ilwork,3,iper,iBC,BC) 572 ! K^{-1} 573 call fMtxVdimVecMult(t2,diag,t2a,3,4,3,3,nshg) 574 call fMtxVdimVecMult(t2a,diag,t2,3,4,3,3,nshg) 575 ! note: different from lestools.c (3,4,4,3) 576 ! t1 should be good to use by now 577 ! G^T ... + C 578 t2b(:,1:3) = -t2(:,1:3) 579 t2b(:,4) = t1(:,1) 580 call commOut(t2b,ilwork,4,iper,iBC,BC) 581 call fLesSparseApNGtC(colm,rowp,lhsP,t2b,t1,nshg,nnz_tot) 582 call commIn(t1,ilwork,1,iper,iBC,BC) 583 ! scaling again 584 call fMtxVdimVecMult(t1,diag(:,4),t1a,1,1,1,1,nshg) 585 call fMtxVdimVecMult(t1a,amg_ppeDiag(1)%p,q,1,1,1,1,nshg) 586 587 end subroutine 588 589!******************************************************************* 590! ramg_PPErhs 591! produce a globally correct rhs 592!******************************************************************* 593 subroutine ramg_PPErhs(rhsp,rhsg,diag, 594 & colm,rowp,lhsK,lhsP, 595 & ilwork,BC,iBC,iper) 596 use ramg_data 597 include "common.h" 598 599 600 real(kind=8),intent(inout),dimension(nshg) :: rhsp 601 real(kind=8),intent(in),dimension(nshg,4) :: rhsg 602 real(kind=8),intent(in),dimension(nshg,4) :: diag 603 integer,intent(in),dimension(nshg+1) :: colm 604 integer,intent(in),dimension(nnz_tot) :: rowp 605 real(kind=8),intent(in),dimension(9,nnz_tot) :: lhsK 606 real(kind=8),intent(in),dimension(4,nnz_tot) :: lhsP 607 integer,intent(in),dimension(nlwork) :: ilwork 608 integer,intent(in),dimension(nshg) :: iBC,iper 609 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 610 611 real(kind=8),dimension(nshg,1) :: t1 612 real(kind=8),dimension(nshg,3) :: t2,t2a 613 614 ! scaling 615 ! call fMtxVdimVecMult(rhsg,diag(:,4),t1,1,4,1,1,nshg) 616 ! call ramg_dump(t1,nshg,'ppe_t1_a ') 617 ! G 618 t2 = rhsg(:,1:3) 619 t1 = rhsg(:,4:4) 620 call commIn(t1,ilwork,1,iper,iBC,BC) 621 call commOut(t1,ilwork,1,iper,iBC,BC) 622 call commIn(t2,ilwork,3,iper,iBC,BC) 623 call commOut(t2,ilwork,3,iper,iBC,BC) 624 ! K^{-1} 625 call fMtxVdimVecMult(t2,diag,t2a,3,4,3,3,nshg) 626 call fMtxVdimVecMult(t2a,diag,t2,3,4,3,3,nshg) 627 call commOut(t2,ilwork,3,iper,iBC,BC) 628 call fLesSparseApNGt(colm,rowp,lhsP,t2,rhsp,nshg,nnz_tot) 629 call commIn(rhsp,ilwork,1,iper,iBC,BC) 630 call commOut(rhsp,ilwork,1,iper,iBC,BC) 631 call ramg_zeroOut(rhsp,ilwork,BC,iBC,iper) 632 ! -G^T ... + Rc 633 rhsp = rhsp - t1(:,1) 634 ! scaling again 635 call fMtxVdimVecMult(rhsp,diag(:,4),t1,1,1,1,1,nshg) 636 rhsp = t1(:,1) 637 638 end subroutine ! ramg_PPErhs 639 640!******************************************************** 641! ramg_init_ilwork(ilwork,BC,iBC,iper) 642! Initialize amg_ilwork(level)%p(:) array 643! For each level, same structure as ilwork 644! 645!******************************************************** 646 subroutine ramg_init_ilwork(ilwork,BC,iBC,iper) 647 use ramg_data 648 include "common.h" 649 include "mpif.h" 650 include "auxmpi.h" 651 integer, intent(in), dimension(nlwork) :: ilwork 652 integer, intent(in),dimension(nshg) :: iBC,iper 653 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 654 655 integer :: lvl,i,j,iacc,iother,numseg,numtask,itkbeg,sigi,k 656 integer :: tasknnztot,itask,amgseg 657 integer,dimension(numpe) :: revp 658 integer,dimension(ilwork(1)) :: tasknnz,iotherl 659 type(i1d),dimension(ilwork(1)) :: taskfill 660 661 integer,dimension(nshg) :: lvlmap 662 663 character :: fname*80 664 665! call ramg_dump_i(amg_cfmap,nshg,1,'amgcfmap ') 666 !write(*,*)'mcheck ilwork,',myrank,amg_nshg 667 668 if (numpe.le.1) return 669 670 if (ramg_setup_flag.ne.0) return 671 672 numtask = ilwork(1) 673 674 do lvl = 1,ramg_levelx 675! lvl = 1 ! test for 1 level only 676 677 ! Create mapping from base to coarse lvl 678 ! lvlmap(baselevelindex) = coarselevelindex 679 lvlmap=0 680 k = 0 681 do i=1,nshg 682 if (amg_cfmap(i).ge.lvl) then 683 k = k+1 684 lvlmap(i) = k 685 endif 686 enddo 687 688 ! Count nnz for each task 689 itkbeg=1 690 tasknnz = 0 691 do i=1,numtask 692 iacc=ilwork(itkbeg+2) 693 iother=ilwork(itkbeg+3) ! starts from 0 694 numseg=ilwork(itkbeg+4) 695 do j=1,numseg 696 k=ilwork(itkbeg+3+2*j) ! row index 697 if (amg_cfmap(k).ge.lvl) then 698 tasknnz(i) = tasknnz(i) + 1 699 endif 700 enddo 701 itkbeg=itkbeg+4+2*numseg 702 enddo 703 tasknnztot = sum(tasknnz) 704 !write(*,*)'mcheck ilwork',myrank,lvl,tasknnz 705 706 ! Fill in ilwork array at each lvl 707 708 amg_nlwork(lvl)=tasknnztot+1+2*numtask 709 allocate(amg_ilwork(lvl)%p(tasknnztot+1+2*numtask)) 710 amg_ilwork(lvl)%p = 0 711 712 amg_ilwork(lvl)%p(1) = numtask 713 itkbeg = 1 714 kk = 1 ! pointer to amg_ilwork array 715 do i=1,numtask 716 iacc = ilwork(itkbeg+2) 717 iother = ilwork(itkbeg+3)+1 718 if (iacc.eq.0) iother=-iother 719 kk = kk+1 720 amg_ilwork(lvl)%p(kk) = iother ! first put iother 721 kk = kk+1 722 amg_ilwork(lvl)%p(kk) = tasknnz(i) ! then numseg 723 numseg = ilwork(itkbeg+4) 724 do j=1,numseg 725 k = ilwork(itkbeg+3+2*j) 726 if (amg_cfmap(k).ge.lvl) then 727 kk = kk+1 728 amg_ilwork(lvl)%p(kk)=lvlmap(k) 729 endif 730 enddo 731 itkbeg=itkbeg+4+2*numseg 732 enddo 733 !write(fname,'((A9)(I1))')'amgilwork',lvl 734 !call ramg_dump_i(amg_ilwork(lvl)%p,amg_nlwork(lvl),1,fname) 735 enddo 736 737 end subroutine ! ramg_init_ilwork 738 739 740!******************************************************* 741! ramg_initBCflag: Setup amg_paramap on PPE level(level0) 742! -1: self, n: neighbour proc number 743!******************************************************* 744 subroutine ramg_initBCflag(flag,ilwork,BC,iBC,iper) 745 use ramg_data 746 include "common.h" 747 include "mpif.h" 748 include "auxmpi.h" 749 750 integer,intent(inout),dimension(nshg) :: flag 751 752 integer, intent(in), dimension(nlwork) :: ilwork 753 integer, intent(in),dimension(nshg) :: iBC,iper 754 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 755 756 integer :: numtask,itag,iacc,iother,numseg,isgbeg,itkbeg 757 integer :: i,j,k,lenseg,nr,nj,mi,mj 758 integer,allocatable,dimension(:) :: neimap 759 character*5 cname 760 character*255 fname,fname1 761 762 flag = myrank+1!note* 763 764 if (numpe.lt.2) then 765 IF (iamg_reduce.gt.1) then 766 ! this block of code create reduced case 767 nr = iamg_reduce 768 allocate(reducemap(nr,nr)) 769 allocate(rmap1d(2,nr*nr)) ! rmap1d(master,slave,rankid) 770 reducemap = 0 771 rmap1d = 0 772 rmapmax = 1 773 do i=1,nr 774 reducemap(i,i) = i 775 rmap1d(1,i) = i 776 rmap1d(2,i) = i 777 enddo 778 rmapmax = nr+1 779 write(fname,"((I1)(A))")nr,'-procs_case/neimap.dat' 780 !fname=trim(cname(nr))//'-proc_case/map_nproc.dat' 781 fname=trim(fname) 782 do i=1,nr 783 fname1 = trim(fname)//cname(i) 784 write(*,*)'mcheck reduced case:',fname1 785 open(264,file=trim(fname1),status='unknown') 786 read(264,*)nj 787 do j=1,nj 788 read(264,*)mi,mj 789 if (mj.gt.0) then 790 reducemap(i,mj) = rmapmax 791 reducemap(mj,i) = -rmapmax 792 write(*,*)'mcheck reducemap,',i,mj,rmapmax 793 rmap1d(1,rmapmax) = i 794 rmap1d(2,rmapmax) = mj 795 rmapmax = rmapmax + 1 796 end if 797 enddo 798 close(264) 799 enddo 800! call ramg_dump_i(reducemap,nr,nr,'reducemap ') 801 call ramg_dump_ic(rmap1d,2,rmapmax,'rmap1d ') 802 flag = 0 803 write(fname,"((I1)(A))")nr,'-procs_case/map_nproc.dat' 804 fname=trim(fname) 805 do i=nr,1,-1 806 fname1 = trim(fname)//cname(i) 807 open(264,file=trim(fname1),status='unknown') 808 read(264,*)nj 809 do j=1,nj 810 read(264,*)mi,mj 811 if (flag(mj).eq.0) then 812 flag(mj) = i 813 else if (flag(mj).le.nr) then 814 !if ((flag(mj).eq.2).and.(i.eq.4)) then 815 !write(*,*)'mcheck!',mj,reducemap(flag(mj),i) 816 !endif 817 flag(mj) = iabs(reducemap(flag(mj),i)) 818 end if 819 enddo 820 close(264) 821 enddo 822 call ramg_dump_i(flag,nshg,1,'initflagbc') 823! flag = myrank + 1 824 ENDIF 825 return 826 end if 827 828 IF (numpe.gt.1) THEN 829 numtask = ilwork(1) 830 allocate(neimap(numtask)) 831 itkbeg = 1 832 do i=1,numtask 833 iacc = ilwork(itkbeg+2) 834 iother = ilwork(itkbeg+3) 835 numseg = ilwork(itkbeg+4) 836 if (iacc.eq.0) then !slave 837 neimap(i) = -(iother+1) 838 else !master 839 neimap(i) = (iother+1) 840 endif 841 do j=1,numseg 842 isgbeg = ilwork(itkbeg+3+j*2) 843 lenseg = ilwork(itkbeg+4+j*2) 844 isgend = isgbeg + lenseg - 1 845 flag(isgbeg:isgend) = neimap(i)!iother+1!note* 846 enddo 847 itkbeg = itkbeg + 4 + 2*numseg 848 enddo 849 850 !call ramg_dump_i(flag,nshg,1,'amgparamap') 851 852 !if (iamg_reduce.gt.0) then 853 !call ramg_dump_i(neimap,numtask,1,'neimap ') 854 !endif 855 deallocate(neimap) 856 ENDIF 857 858 ! note*: +1 to make paramap array range from 1 to numprocs 859 ! this will avoid 0. 860 ! n=proc indicates neighbour 861 ! n=-proc indicates no coarsening necessary or other info 862 863 864 end subroutine ! ramg_initBCflag 865 866!**************************************************************** 867! commOut_i : commOut for integer arrays, pack commOut 868!**************************************************************** 869 subroutine commOut_i(global,ilwork,n,iper,iBC,BC) 870 include "common.h" 871 integer global(nshg,n) 872 real*8 BC(nshg,ndofBC) 873 integer ilwork(nlwork),iper(nshg),iBC(nshg) 874 ! temp array 875 real(kind=8) aglobal(nshg,n) 876 integer i,j 877 do i=1,nshg 878 do j=1,n 879 aglobal(i,j) = global(i,j) 880 enddo 881 enddo 882 call commOut(aglobal,ilwork,n,iper,iBC,BC) 883 do i=1,nshg 884 do j=1,n 885 global(i,j)=aglobal(i,j) 886 enddo 887 enddo 888 end subroutine ! commOut_i 889 890!**************************************************************** 891! ramg_commOut: commOut for amg array in higher level 892!**************************************************************** 893 subroutine ramg_commOut(global,level,ilwork,redun,iper,iBC,BC) 894 use ramg_data 895 include "common.h" 896 include "mpif.h" 897 include "auxmpi.h" 898 899 integer,intent(in) :: level 900 integer,intent(in) :: redun 901 real(kind=8),intent(inout),dimension(amg_nshg(level),redun) 902 & :: global 903 integer,intent(in),dimension(nlwork) :: ilwork 904 integer,intent(in),dimension(nshg) :: iBC,iper 905 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 906 907 integer :: numtask,iacc,itag(ilwork(1)),i,j,m,k 908 integer :: iother(ilwork(1)),ierr,iotheri 909 integer :: numseg(ilwork(1)) 910 integer :: mstat(MPI_STATUS_SIZE,ilwork(1)) 911 integer :: req(ilwork(1)) 912 type(r2d),dimension(ilwork(1)) :: tmparray 913 914 if (numpe.le.1) return 915 916 if (level.eq.1) then 917 call commOut(global,ilwork,1,iper,iBC,BC) 918 return 919 endif 920 921 numtask = amg_ilwork(level)%p(1) 922 923 lother = -1 924 itkbeg=1 925 do i=1,numtask 926 itag(i)=ilwork(itkbeg+1) 927 itkbeg=itkbeg+4+2*ilwork(itkbeg+4) 928 enddo 929 itkbeg=1 930 do i=1,numtask 931 iother(i)=amg_ilwork(level)%p(itkbeg+1) 932 numseg(i)=amg_ilwork(level)%p(itkbeg+2) 933 allocate(tmparray(i)%p(numseg(i),redun)) 934 itkbeg=itkbeg+2 935 do j=1,numseg(i) 936 itkbeg=itkbeg+1 937 tmparray(i)%p(j,:)=global(amg_ilwork(level)%p(itkbeg),:) 938 enddo 939 enddo 940 941 if ((myrank.eq.1).and.(level.eq.2)) then ! debug 942! write(*,*)'mcheck debug ilwork',amg_nlwork(2) 943! call ramg_dump(global,amg_nshg(level),'debuglobal') 944! call ramg_dump_i(amg_ilwork(2)%p,amg_nlwork(2),1,'debugilwok') 945! call ramg_dump(tmparray(1)%p,numseg(1),'debuglvl2 ') 946 endif 947 948 IF (.TRUE.) THEN 949 m =0 950 do i=1,numtask 951 m = m+1 952 iotheri = iabs(iother(i))-1 953 if (iother(i)>0) then ! master send 954! write(*,*)'mcheck commou send',myrank,iotheri,numseg(i),itag(i) 955 call MPI_ISEND(tmparray(i)%p(1,1),redun*numseg(i), 956 & MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD, 957 & req(m),ierr) 958 else ! slave receive 959! write(*,*)'mcheck commou recv',myrank,iotheri,numseg(i),itag(i) 960 call MPI_IRECV(tmparray(i)%p(1,1),redun*numseg(i), 961 & MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD, 962 & req(m),ierr) 963 endif 964 enddo 965 966! write(*,*)'mcheck commout m,',m 967 968 call MPI_WAITALL(m,req,mstat,ierr) 969 ENDIF 970 971 ! put back 972 973 itkbeg=1 974 do i=1,numtask 975 numseg(i)=amg_ilwork(level)%p(itkbeg+2) 976 itkbeg=itkbeg+2 977 do j=1,numseg(i) 978 itkbeg=itkbeg+1 979 global(amg_ilwork(level)%p(itkbeg),:)=tmparray(i)%p(j,:) 980 enddo 981 enddo 982 983 do i=1,numtask 984 deallocate(tmparray(i)%p) 985 enddo 986 987 end subroutine ! ramg_commOut 988 989 subroutine ramg_commIn(global,level,ilwork,redun,iper,iBC,BC) 990 use ramg_data 991 include "common.h" 992 include "mpif.h" 993 include "auxmpi.h" 994 995 integer,intent(in) :: level 996 integer,intent(in) :: redun 997 real(kind=8),intent(inout),dimension(amg_nshg(level),redun) 998 & :: global 999 integer,intent(in),dimension(nlwork) :: ilwork 1000 integer,intent(in),dimension(nshg) :: iBC,iper 1001 real(kind=8),intent(in),dimension(nshg,ndofBC) :: BC 1002 1003 integer :: numtask,iacc,itag(ilwork(1)),i,j,m,k 1004 integer :: iother(ilwork(1)),ierr,iotheri 1005 integer :: numseg(ilwork(1)) 1006 integer :: mstat(MPI_STATUS_SIZE,ilwork(1)) 1007 integer :: req(ilwork(1)) 1008 type(r2d),dimension(ilwork(1)) :: tmparray 1009 1010 if (numpe.le.1) return 1011 1012 if (level.eq.1) then 1013 call commIn(global,ilwork,1,iper,iBC,BC) 1014 return 1015 endif 1016 1017 numtask = amg_ilwork(level)%p(1) 1018 1019 lother = -1 1020 itkbeg=1 1021 do i=1,numtask 1022 itag(i)=ilwork(itkbeg+1) 1023 itkbeg=itkbeg+4+2*ilwork(itkbeg+4) 1024 enddo 1025 itkbeg=1 1026 do i=1,numtask 1027 iother(i)=amg_ilwork(level)%p(itkbeg+1) 1028 numseg(i)=amg_ilwork(level)%p(itkbeg+2) 1029 allocate(tmparray(i)%p(numseg(i),redun)) 1030 itkbeg=itkbeg+2 1031 do j=1,numseg(i) 1032 itkbeg=itkbeg+1 1033 tmparray(i)%p(j,:)=global(amg_ilwork(level)%p(itkbeg),:) 1034 enddo 1035 enddo 1036 1037 IF (.TRUE.) THEN 1038 m =0 1039 do i=1,numtask 1040 m = m+1 1041 iotheri = iabs(iother(i))-1 1042 if (iother(i)>0) then ! master receive 1043 call MPI_IRECV(tmparray(i)%p(1,1),redun*numseg(i), 1044 & MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD, 1045 & req(m),ierr) 1046 else ! slave send 1047 call MPI_ISEND(tmparray(i)%p(1,1),redun*numseg(i), 1048 & MPI_DOUBLE_PRECISION,iotheri,itag(i),MPI_COMM_WORLD, 1049 & req(m),ierr) 1050 endif 1051 enddo 1052 1053! write(*,*)'mcheck commout m,',m 1054 1055 call MPI_WAITALL(m,req,mstat,ierr) 1056 ENDIF 1057 1058 ! put back 1059 1060 itkbeg=1 1061 do i=1,numtask 1062 numseg(i)=amg_ilwork(level)%p(itkbeg+2) 1063 itkbeg=itkbeg+2 1064 do j=1,numseg(i) 1065 itkbeg=itkbeg+1 1066 if (iother(i)>0) then ! master 1067 global(amg_ilwork(level)%p(itkbeg),:)= 1068 & global(amg_ilwork(level)%p(itkbeg),:)+tmparray(i)%p(j,:) 1069 else 1070 global(amg_ilwork(level)%p(itkbeg),:)=0 1071 endif 1072 enddo 1073 enddo 1074 1075 do i=1,numtask 1076 deallocate(tmparray(i)%p) 1077 enddo 1078 1079 end subroutine ! ramg_commIn 1080 1081 subroutine ramg_mapv2g(level,carray,garray) 1082 ! map to level 0 array 1083 use ramg_data 1084 include "common.h" 1085 integer,intent(in) :: level 1086 real(kind=8),intent(inout),dimension(nshg) :: garray 1087 real(kind=8),intent(in),dimension(amg_nshg(level)) :: carray 1088 1089 integer :: i,j,k 1090 integer,dimension(amg_nshg(level)) :: revmap 1091 revmap = 0 1092 garray(:) = 0 1093 j = 1 1094 do i=1,nshg 1095 if (amg_cfmap(i).ge.level) then 1096 revmap(j) = i 1097 j = j+1 1098 end if 1099 enddo 1100 do i=1,j-1 1101 garray(revmap(i)) = carray(i) 1102 enddo 1103 1104 end subroutine !ramg_mapv2g 1105 1106 subroutine ramg_mapg2v(level,carray,garray) 1107 use ramg_data 1108 include "common.h" 1109 integer,intent(in) :: level 1110 real(kind=8),intent(in),dimension(nshg) :: garray 1111 real(kind=8),intent(inout),dimension(amg_nshg(level)) ::carray 1112 integer :: i,j,k 1113 carray(:) = 0 1114 j = 1 1115 do i=1,nshg 1116 if (amg_cfmap(i).ge.level) then 1117 carray(j) = garray(i) 1118 j = j+1 1119 end if 1120 enddo 1121 end subroutine !ramg_mapg2v 1122