1 2 /***********************************comm.c************************************* 3 SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include "petsc.h" 24 25 #include "const.h" 26 #include "types.h" 27 #include "error.h" 28 #include "ivec.h" 29 #include "bss_malloc.h" 30 #include "comm.h" 31 32 33 /* global program control variables - explicitly exported */ 34 int my_id = 0; 35 int num_nodes = 1; 36 int floor_num_nodes = 0; 37 int i_log2_num_nodes = 0; 38 39 /* global program control variables */ 40 static int p_init = 0; 41 static int modfl_num_nodes; 42 static int edge_not_pow_2; 43 44 static unsigned int edge_node[INT_LEN*32]; 45 46 /***********************************comm.c************************************* 47 Function: giop() 48 49 Input : 50 Output: 51 Return: 52 Description: 53 ***********************************comm.c*************************************/ 54 void 55 comm_init (void) 56 { 57 #ifdef DEBUG 58 error_msg_warning("c_init() :: start\n"); 59 #endif 60 61 if (p_init++) return; 62 63 #if PETSC_SIZEOF_INT != 4 64 error_msg_warning("Int != 4 Bytes!"); 65 #endif 66 67 68 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 69 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 70 71 if (num_nodes> (INT_MAX >> 1)) 72 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 73 74 ivec_zero((int*)edge_node,INT_LEN*32); 75 76 floor_num_nodes = 1; 77 i_log2_num_nodes = modfl_num_nodes = 0; 78 while (floor_num_nodes <= num_nodes) 79 { 80 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 81 floor_num_nodes <<= 1; 82 i_log2_num_nodes++; 83 } 84 85 i_log2_num_nodes--; 86 floor_num_nodes >>= 1; 87 modfl_num_nodes = (num_nodes - floor_num_nodes); 88 89 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 90 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 91 else if (my_id >= floor_num_nodes) 92 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 93 } 94 else 95 {edge_not_pow_2 = 0;} 96 97 #ifdef DEBUG 98 error_msg_warning("c_init() done :: my_id=%d, num_nodes=%D",my_id,num_nodes); 99 #endif 100 } 101 102 103 104 /***********************************comm.c************************************* 105 Function: giop() 106 107 Input : 108 Output: 109 Return: 110 Description: fan-in/out version 111 ***********************************comm.c*************************************/ 112 void 113 giop(int *vals, int *work, int n, int *oprs) 114 { 115 register int mask, edge; 116 int type, dest; 117 vfp fp; 118 MPI_Status status; 119 120 121 #ifdef SAFE 122 /* ok ... should have some data, work, and operator(s) */ 123 if (!vals||!work||!oprs) 124 {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 125 126 /* non-uniform should have at least two entries */ 127 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 128 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 129 130 /* check to make sure comm package has been initialized */ 131 if (!p_init) 132 {comm_init();} 133 #endif 134 135 /* if there's nothing to do return */ 136 if ((num_nodes<2)||(!n)) 137 { 138 #ifdef DEBUG 139 error_msg_warning("giop() :: n=%D num_nodes=%d",n,num_nodes); 140 #endif 141 return; 142 } 143 144 /* a negative number if items to send ==> fatal */ 145 if (n<0) 146 {error_msg_fatal("giop() :: n=%D<0?",n);} 147 148 /* advance to list of n operations for custom */ 149 if ((type=oprs[0])==NON_UNIFORM) 150 {oprs++;} 151 152 /* major league hack */ 153 if (!(fp = (vfp) ivec_fct_addr(type))) { 154 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 155 fp = (vfp) oprs; 156 } 157 158 /* all msgs will be of the same length */ 159 /* if not a hypercube must colapse partial dim */ 160 if (edge_not_pow_2) 161 { 162 if (my_id >= floor_num_nodes) 163 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 164 else 165 { 166 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 167 MPI_COMM_WORLD,&status); 168 (*fp)(vals,work,n,oprs); 169 } 170 } 171 172 /* implement the mesh fan in/out exchange algorithm */ 173 if (my_id<floor_num_nodes) 174 { 175 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 176 { 177 dest = my_id^mask; 178 if (my_id > dest) 179 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 180 else 181 { 182 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 183 MPI_COMM_WORLD, &status); 184 (*fp)(vals, work, n, oprs); 185 } 186 } 187 188 mask=floor_num_nodes>>1; 189 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 190 { 191 if (my_id%mask) 192 {continue;} 193 194 dest = my_id^mask; 195 if (my_id < dest) 196 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 197 else 198 { 199 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 200 MPI_COMM_WORLD, &status); 201 } 202 } 203 } 204 205 /* if not a hypercube must expand to partial dim */ 206 if (edge_not_pow_2) 207 { 208 if (my_id >= floor_num_nodes) 209 { 210 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 211 MPI_COMM_WORLD,&status); 212 } 213 else 214 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 215 } 216 } 217 218 /***********************************comm.c************************************* 219 Function: grop() 220 221 Input : 222 Output: 223 Return: 224 Description: fan-in/out version 225 ***********************************comm.c*************************************/ 226 void 227 grop(REAL *vals, REAL *work, int n, int *oprs) 228 { 229 register int mask, edge; 230 int type, dest; 231 vfp fp; 232 MPI_Status status; 233 234 235 #ifdef SAFE 236 /* ok ... should have some data, work, and operator(s) */ 237 if (!vals||!work||!oprs) 238 {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 239 240 /* non-uniform should have at least two entries */ 241 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 242 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 243 244 /* check to make sure comm package has been initialized */ 245 if (!p_init) 246 {comm_init();} 247 #endif 248 249 /* if there's nothing to do return */ 250 if ((num_nodes<2)||(!n)) 251 {return;} 252 253 /* a negative number of items to send ==> fatal */ 254 if (n<0) 255 {error_msg_fatal("gdop() :: n=%D<0?",n);} 256 257 /* advance to list of n operations for custom */ 258 if ((type=oprs[0])==NON_UNIFORM) 259 {oprs++;} 260 261 if (!(fp = (vfp) rvec_fct_addr(type))) { 262 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 263 fp = (vfp) oprs; 264 } 265 266 /* all msgs will be of the same length */ 267 /* if not a hypercube must colapse partial dim */ 268 if (edge_not_pow_2) 269 { 270 if (my_id >= floor_num_nodes) 271 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id, 272 MPI_COMM_WORLD);} 273 else 274 { 275 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 276 MPI_COMM_WORLD,&status); 277 (*fp)(vals,work,n,oprs); 278 } 279 } 280 281 /* implement the mesh fan in/out exchange algorithm */ 282 if (my_id<floor_num_nodes) 283 { 284 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 285 { 286 dest = my_id^mask; 287 if (my_id > dest) 288 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 289 else 290 { 291 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 292 MPI_COMM_WORLD, &status); 293 (*fp)(vals, work, n, oprs); 294 } 295 } 296 297 mask=floor_num_nodes>>1; 298 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 299 { 300 if (my_id%mask) 301 {continue;} 302 303 dest = my_id^mask; 304 if (my_id < dest) 305 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 306 else 307 { 308 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest, 309 MPI_COMM_WORLD, &status); 310 } 311 } 312 } 313 314 /* if not a hypercube must expand to partial dim */ 315 if (edge_not_pow_2) 316 { 317 if (my_id >= floor_num_nodes) 318 { 319 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 320 MPI_COMM_WORLD,&status); 321 } 322 else 323 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id, 324 MPI_COMM_WORLD);} 325 } 326 } 327 328 329 /***********************************comm.c************************************* 330 Function: grop() 331 332 Input : 333 Output: 334 Return: 335 Description: fan-in/out version 336 337 note good only for num_nodes=2^k!!! 338 339 ***********************************comm.c*************************************/ 340 void 341 grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim) 342 { 343 register int mask, edge; 344 int type, dest; 345 vfp fp; 346 MPI_Status status; 347 348 #ifdef SAFE 349 /* ok ... should have some data, work, and operator(s) */ 350 if (!vals||!work||!oprs) 351 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 352 353 /* non-uniform should have at least two entries */ 354 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 355 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 356 357 /* check to make sure comm package has been initialized */ 358 if (!p_init) 359 {comm_init();} 360 #endif 361 362 /* if there's nothing to do return */ 363 if ((num_nodes<2)||(!n)||(dim<=0)) 364 {return;} 365 366 /* the error msg says it all!!! */ 367 if (modfl_num_nodes) 368 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 369 370 /* a negative number of items to send ==> fatal */ 371 if (n<0) 372 {error_msg_fatal("grop_hc() :: n=%D<0?",n);} 373 374 /* can't do more dimensions then exist */ 375 dim = PetscMin(dim,i_log2_num_nodes); 376 377 /* advance to list of n operations for custom */ 378 if ((type=oprs[0])==NON_UNIFORM) 379 {oprs++;} 380 381 if (!(fp = (vfp) rvec_fct_addr(type))) { 382 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 383 fp = (vfp) oprs; 384 } 385 386 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 387 { 388 dest = my_id^mask; 389 if (my_id > dest) 390 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 391 else 392 { 393 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 394 &status); 395 (*fp)(vals, work, n, oprs); 396 } 397 } 398 399 if (edge==dim) 400 {mask>>=1;} 401 else 402 {while (++edge<dim) {mask<<=1;}} 403 404 for (edge=0; edge<dim; edge++,mask>>=1) 405 { 406 if (my_id%mask) 407 {continue;} 408 409 dest = my_id^mask; 410 if (my_id < dest) 411 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 412 else 413 { 414 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 415 &status); 416 } 417 } 418 } 419 420 421 /***********************************comm.c************************************* 422 Function: gop() 423 424 Input : 425 Output: 426 Return: 427 Description: fan-in/out version 428 ***********************************comm.c*************************************/ 429 void gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type) 430 { 431 register int mask, edge; 432 int dest; 433 MPI_Status status; 434 MPI_Op op; 435 436 #ifdef SAFE 437 /* check to make sure comm package has been initialized */ 438 if (!p_init) 439 {comm_init();} 440 441 /* ok ... should have some data, work, and operator(s) */ 442 if (!vals||!work||!fp) 443 {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);} 444 #endif 445 446 /* if there's nothing to do return */ 447 if ((num_nodes<2)||(!n)) 448 {return;} 449 450 /* a negative number of items to send ==> fatal */ 451 if (n<0) 452 {error_msg_fatal("gop() :: n=%D<0?",n);} 453 454 if (comm_type==MPI) 455 { 456 MPI_Op_create(fp,TRUE,&op); 457 MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 458 MPI_Op_free(&op); 459 return; 460 } 461 462 /* if not a hypercube must colapse partial dim */ 463 if (edge_not_pow_2) 464 { 465 if (my_id >= floor_num_nodes) 466 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 467 MPI_COMM_WORLD);} 468 else 469 { 470 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 471 MPI_COMM_WORLD,&status); 472 (*fp)(vals,work,&n,&dt); 473 } 474 } 475 476 /* implement the mesh fan in/out exchange algorithm */ 477 if (my_id<floor_num_nodes) 478 { 479 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 480 { 481 dest = my_id^mask; 482 if (my_id > dest) 483 {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 484 else 485 { 486 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 487 MPI_COMM_WORLD, &status); 488 (*fp)(vals, work, &n, &dt); 489 } 490 } 491 492 mask=floor_num_nodes>>1; 493 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 494 { 495 if (my_id%mask) 496 {continue;} 497 498 dest = my_id^mask; 499 if (my_id < dest) 500 {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 501 else 502 { 503 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 504 MPI_COMM_WORLD, &status); 505 } 506 } 507 } 508 509 /* if not a hypercube must expand to partial dim */ 510 if (edge_not_pow_2) 511 { 512 if (my_id >= floor_num_nodes) 513 { 514 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 515 MPI_COMM_WORLD,&status); 516 } 517 else 518 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 519 MPI_COMM_WORLD);} 520 } 521 } 522 523 524 525 526 527 528 /****************************************************************************** 529 Function: giop() 530 531 Input : 532 Output: 533 Return: 534 Description: 535 536 ii+1 entries in seg :: 0 .. ii 537 538 ******************************************************************************/ 539 void 540 ssgl_radd(register REAL *vals, register REAL *work, register int level, 541 register int *segs) 542 { 543 register int edge, type, dest, mask; 544 register int stage_n; 545 MPI_Status status; 546 547 #ifdef DEBUG 548 if (level > i_log2_num_nodes) 549 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 550 #endif 551 552 #ifdef SAFE 553 /* check to make sure comm package has been initialized */ 554 if (!p_init) 555 {comm_init();} 556 #endif 557 558 559 /* all msgs are *NOT* the same length */ 560 /* implement the mesh fan in/out exchange algorithm */ 561 for (mask=0, edge=0; edge<level; edge++, mask++) 562 { 563 stage_n = (segs[level] - segs[edge]); 564 if (stage_n && !(my_id & mask)) 565 { 566 dest = edge_node[edge]; 567 type = MSGTAG3 + my_id + (num_nodes*edge); 568 if (my_id>dest) 569 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 570 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 571 MPI_COMM_WORLD);} 572 else 573 { 574 type = type - my_id + dest; 575 /* crecv(type,work,stage_n*REAL_LEN); */ 576 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 577 MPI_COMM_WORLD,&status); 578 rvec_add(vals+segs[edge], work, stage_n); 579 /* daxpy(vals+segs[edge], work, stage_n); */ 580 } 581 } 582 mask <<= 1; 583 } 584 mask>>=1; 585 for (edge=0; edge<level; edge++) 586 { 587 stage_n = (segs[level] - segs[level-1-edge]); 588 if (stage_n && !(my_id & mask)) 589 { 590 dest = edge_node[level-edge-1]; 591 type = MSGTAG6 + my_id + (num_nodes*edge); 592 if (my_id<dest) 593 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 594 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 595 MPI_COMM_WORLD);} 596 else 597 { 598 type = type - my_id + dest; 599 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 600 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 601 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 602 } 603 } 604 mask >>= 1; 605 } 606 } 607 608 609 610 /***********************************comm.c************************************* 611 Function: grop_hc_vvl() 612 613 Input : 614 Output: 615 Return: 616 Description: fan-in/out version 617 618 note good only for num_nodes=2^k!!! 619 620 ***********************************comm.c*************************************/ 621 void 622 grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim) 623 { 624 register int mask, edge, n; 625 int type, dest; 626 vfp fp; 627 MPI_Status status; 628 629 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 630 631 #ifdef SAFE 632 /* ok ... should have some data, work, and operator(s) */ 633 if (!vals||!work||!oprs||!segs) 634 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 635 636 /* non-uniform should have at least two entries */ 637 #if defined(not_used) 638 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 639 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 640 #endif 641 642 /* check to make sure comm package has been initialized */ 643 if (!p_init) 644 {comm_init();} 645 #endif 646 647 /* if there's nothing to do return */ 648 if ((num_nodes<2)||(dim<=0)) 649 {return;} 650 651 /* the error msg says it all!!! */ 652 if (modfl_num_nodes) 653 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 654 655 /* can't do more dimensions then exist */ 656 dim = PetscMin(dim,i_log2_num_nodes); 657 658 /* advance to list of n operations for custom */ 659 if ((type=oprs[0])==NON_UNIFORM) 660 {oprs++;} 661 662 if (!(fp = (vfp) rvec_fct_addr(type))){ 663 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 664 fp = (vfp) oprs; 665 } 666 667 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 668 { 669 n = segs[dim]-segs[edge]; 670 dest = my_id^mask; 671 if (my_id > dest) 672 {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 673 else 674 { 675 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 676 MPI_COMM_WORLD, &status); 677 (*fp)(vals+segs[edge], work, n, oprs); 678 } 679 } 680 681 if (edge==dim) 682 {mask>>=1;} 683 else 684 {while (++edge<dim) {mask<<=1;}} 685 686 for (edge=0; edge<dim; edge++,mask>>=1) 687 { 688 if (my_id%mask) 689 {continue;} 690 691 n = (segs[dim]-segs[dim-1-edge]); 692 693 dest = my_id^mask; 694 if (my_id < dest) 695 {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id, 696 MPI_COMM_WORLD);} 697 else 698 { 699 MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE, 700 MSGTAG4+dest,MPI_COMM_WORLD, &status); 701 } 702 } 703 } 704 705 /****************************************************************************** 706 Function: giop() 707 708 Input : 709 Output: 710 Return: 711 Description: 712 713 ii+1 entries in seg :: 0 .. ii 714 715 ******************************************************************************/ 716 void new_ssgl_radd(register REAL *vals, register REAL *work, register int level,register int *segs) 717 { 718 register int edge, type, dest, mask; 719 register int stage_n; 720 MPI_Status status; 721 722 723 #ifdef DEBUG 724 if (level > i_log2_num_nodes) 725 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 726 #endif 727 728 #ifdef SAFE 729 /* check to make sure comm package has been initialized */ 730 if (!p_init) 731 {comm_init();} 732 #endif 733 734 /* all msgs are *NOT* the same length */ 735 /* implement the mesh fan in/out exchange algorithm */ 736 for (mask=0, edge=0; edge<level; edge++, mask++) 737 { 738 stage_n = (segs[level] - segs[edge]); 739 if (stage_n && !(my_id & mask)) 740 { 741 dest = edge_node[edge]; 742 type = MSGTAG3 + my_id + (num_nodes*edge); 743 if (my_id>dest) 744 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 745 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 746 MPI_COMM_WORLD);} 747 else 748 { 749 type = type - my_id + dest; 750 /* crecv(type,work,stage_n*REAL_LEN); */ 751 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 752 MPI_COMM_WORLD,&status); 753 rvec_add(vals+segs[edge], work, stage_n); 754 /* daxpy(vals+segs[edge], work, stage_n); */ 755 } 756 } 757 mask <<= 1; 758 } 759 mask>>=1; 760 for (edge=0; edge<level; edge++) 761 { 762 stage_n = (segs[level] - segs[level-1-edge]); 763 if (stage_n && !(my_id & mask)) 764 { 765 dest = edge_node[level-edge-1]; 766 type = MSGTAG6 + my_id + (num_nodes*edge); 767 if (my_id<dest) 768 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 769 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 770 MPI_COMM_WORLD);} 771 else 772 { 773 type = type - my_id + dest; 774 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 775 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 776 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 777 } 778 } 779 mask >>= 1; 780 } 781 } 782 783 784 785 /***********************************comm.c************************************* 786 Function: giop() 787 788 Input : 789 Output: 790 Return: 791 Description: fan-in/out version 792 793 note good only for num_nodes=2^k!!! 794 795 ***********************************comm.c*************************************/ 796 void giop_hc(int *vals, int *work, int n, int *oprs, int dim) 797 { 798 register int mask, edge; 799 int type, dest; 800 vfp fp; 801 MPI_Status status; 802 803 #ifdef SAFE 804 /* ok ... should have some data, work, and operator(s) */ 805 if (!vals||!work||!oprs) 806 {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 807 808 /* non-uniform should have at least two entries */ 809 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 810 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 811 812 /* check to make sure comm package has been initialized */ 813 if (!p_init) 814 {comm_init();} 815 #endif 816 817 /* if there's nothing to do return */ 818 if ((num_nodes<2)||(!n)||(dim<=0)) 819 {return;} 820 821 /* the error msg says it all!!! */ 822 if (modfl_num_nodes) 823 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 824 825 /* a negative number of items to send ==> fatal */ 826 if (n<0) 827 {error_msg_fatal("giop_hc() :: n=%D<0?",n);} 828 829 /* can't do more dimensions then exist */ 830 dim = PetscMin(dim,i_log2_num_nodes); 831 832 /* advance to list of n operations for custom */ 833 if ((type=oprs[0])==NON_UNIFORM) 834 {oprs++;} 835 836 if (!(fp = (vfp) ivec_fct_addr(type))){ 837 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 838 fp = (vfp) oprs; 839 } 840 841 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 842 { 843 dest = my_id^mask; 844 if (my_id > dest) 845 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 846 else 847 { 848 MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 849 &status); 850 (*fp)(vals, work, n, oprs); 851 } 852 } 853 854 if (edge==dim) 855 {mask>>=1;} 856 else 857 {while (++edge<dim) {mask<<=1;}} 858 859 for (edge=0; edge<dim; edge++,mask>>=1) 860 { 861 if (my_id%mask) 862 {continue;} 863 864 dest = my_id^mask; 865 if (my_id < dest) 866 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 867 else 868 { 869 MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 870 &status); 871 } 872 } 873 } 874