1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include "src/ksp/pc/impls/tfs/tfs.h" 24 25 26 /* global program control variables - explicitly exported */ 27 int my_id = 0; 28 int num_nodes = 1; 29 int floor_num_nodes = 0; 30 int i_log2_num_nodes = 0; 31 32 /* global program control variables */ 33 static int p_init = 0; 34 static int modfl_num_nodes; 35 static int edge_not_pow_2; 36 37 static unsigned int edge_node[sizeof(PetscInt)*32]; 38 39 /***********************************comm.c************************************* 40 Function: giop() 41 42 Input : 43 Output: 44 Return: 45 Description: 46 ***********************************comm.c*************************************/ 47 void 48 comm_init (void) 49 { 50 51 if (p_init++) return; 52 53 #if PETSC_SIZEOF_INT != 4 54 error_msg_warning("Int != 4 Bytes!"); 55 #endif 56 57 58 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 59 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 60 61 if (num_nodes> (INT_MAX >> 1)) 62 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 63 64 ivec_zero((int*)edge_node,sizeof(PetscInt)*32); 65 66 floor_num_nodes = 1; 67 i_log2_num_nodes = modfl_num_nodes = 0; 68 while (floor_num_nodes <= num_nodes) 69 { 70 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 71 floor_num_nodes <<= 1; 72 i_log2_num_nodes++; 73 } 74 75 i_log2_num_nodes--; 76 floor_num_nodes >>= 1; 77 modfl_num_nodes = (num_nodes - floor_num_nodes); 78 79 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 80 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 81 else if (my_id >= floor_num_nodes) 82 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 83 } 84 else 85 {edge_not_pow_2 = 0;} 86 87 } 88 89 90 91 /***********************************comm.c************************************* 92 Function: giop() 93 94 Input : 95 Output: 96 Return: 97 Description: fan-in/out version 98 ***********************************comm.c*************************************/ 99 void 100 giop(int *vals, int *work, int n, int *oprs) 101 { 102 int mask, edge; 103 int type, dest; 104 vfp fp; 105 MPI_Status status; 106 int ierr; 107 108 109 #ifdef SAFE 110 /* ok ... should have some data, work, and operator(s) */ 111 if (!vals||!work||!oprs) 112 {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 113 114 /* non-uniform should have at least two entries */ 115 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 116 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 117 118 /* check to make sure comm package has been initialized */ 119 if (!p_init) 120 {comm_init();} 121 #endif 122 123 /* if there's nothing to do return */ 124 if ((num_nodes<2)||(!n)) 125 { 126 return; 127 } 128 129 /* a negative number if items to send ==> fatal */ 130 if (n<0) 131 {error_msg_fatal("giop() :: n=%D<0?",n);} 132 133 /* advance to list of n operations for custom */ 134 if ((type=oprs[0])==NON_UNIFORM) 135 {oprs++;} 136 137 /* major league hack */ 138 if (!(fp = (vfp) ivec_fct_addr(type))) { 139 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 140 fp = (vfp) oprs; 141 } 142 143 /* all msgs will be of the same length */ 144 /* if not a hypercube must colapse partial dim */ 145 if (edge_not_pow_2) 146 { 147 if (my_id >= floor_num_nodes) 148 {ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 149 else 150 { 151 ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 152 MPI_COMM_WORLD,&status); 153 (*fp)(vals,work,n,oprs); 154 } 155 } 156 157 /* implement the mesh fan in/out exchange algorithm */ 158 if (my_id<floor_num_nodes) 159 { 160 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 161 { 162 dest = my_id^mask; 163 if (my_id > dest) 164 {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 165 else 166 { 167 ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 168 MPI_COMM_WORLD, &status); 169 (*fp)(vals, work, n, oprs); 170 } 171 } 172 173 mask=floor_num_nodes>>1; 174 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 175 { 176 if (my_id%mask) 177 {continue;} 178 179 dest = my_id^mask; 180 if (my_id < dest) 181 {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 182 else 183 { 184 ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 185 MPI_COMM_WORLD, &status); 186 } 187 } 188 } 189 190 /* if not a hypercube must expand to partial dim */ 191 if (edge_not_pow_2) 192 { 193 if (my_id >= floor_num_nodes) 194 { 195 ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 196 MPI_COMM_WORLD,&status); 197 } 198 else 199 {ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 200 } 201 } 202 203 /***********************************comm.c************************************* 204 Function: grop() 205 206 Input : 207 Output: 208 Return: 209 Description: fan-in/out version 210 ***********************************comm.c*************************************/ 211 void 212 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs) 213 { 214 int mask, edge; 215 int type, dest; 216 vfp fp; 217 MPI_Status status; 218 int ierr; 219 220 221 #ifdef SAFE 222 /* ok ... should have some data, work, and operator(s) */ 223 if (!vals||!work||!oprs) 224 {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 225 226 /* non-uniform should have at least two entries */ 227 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 228 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 229 230 /* check to make sure comm package has been initialized */ 231 if (!p_init) 232 {comm_init();} 233 #endif 234 235 /* if there's nothing to do return */ 236 if ((num_nodes<2)||(!n)) 237 {return;} 238 239 /* a negative number of items to send ==> fatal */ 240 if (n<0) 241 {error_msg_fatal("gdop() :: n=%D<0?",n);} 242 243 /* advance to list of n operations for custom */ 244 if ((type=oprs[0])==NON_UNIFORM) 245 {oprs++;} 246 247 if (!(fp = (vfp) rvec_fct_addr(type))) { 248 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 249 fp = (vfp) oprs; 250 } 251 252 /* all msgs will be of the same length */ 253 /* if not a hypercube must colapse partial dim */ 254 if (edge_not_pow_2) 255 { 256 if (my_id >= floor_num_nodes) 257 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id, 258 MPI_COMM_WORLD);} 259 else 260 { 261 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 262 MPI_COMM_WORLD,&status); 263 (*fp)(vals,work,n,oprs); 264 } 265 } 266 267 /* implement the mesh fan in/out exchange algorithm */ 268 if (my_id<floor_num_nodes) 269 { 270 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 271 { 272 dest = my_id^mask; 273 if (my_id > dest) 274 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 275 else 276 { 277 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 278 MPI_COMM_WORLD, &status); 279 (*fp)(vals, work, n, oprs); 280 } 281 } 282 283 mask=floor_num_nodes>>1; 284 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 285 { 286 if (my_id%mask) 287 {continue;} 288 289 dest = my_id^mask; 290 if (my_id < dest) 291 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 292 else 293 { 294 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest, 295 MPI_COMM_WORLD, &status); 296 } 297 } 298 } 299 300 /* if not a hypercube must expand to partial dim */ 301 if (edge_not_pow_2) 302 { 303 if (my_id >= floor_num_nodes) 304 { 305 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 306 MPI_COMM_WORLD,&status); 307 } 308 else 309 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id, 310 MPI_COMM_WORLD);} 311 } 312 } 313 314 315 /***********************************comm.c************************************* 316 Function: grop() 317 318 Input : 319 Output: 320 Return: 321 Description: fan-in/out version 322 323 note good only for num_nodes=2^k!!! 324 325 ***********************************comm.c*************************************/ 326 void 327 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim) 328 { 329 int mask, edge; 330 int type, dest; 331 vfp fp; 332 MPI_Status status; 333 int ierr; 334 335 #ifdef SAFE 336 /* ok ... should have some data, work, and operator(s) */ 337 if (!vals||!work||!oprs) 338 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 339 340 /* non-uniform should have at least two entries */ 341 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 342 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 343 344 /* check to make sure comm package has been initialized */ 345 if (!p_init) 346 {comm_init();} 347 #endif 348 349 /* if there's nothing to do return */ 350 if ((num_nodes<2)||(!n)||(dim<=0)) 351 {return;} 352 353 /* the error msg says it all!!! */ 354 if (modfl_num_nodes) 355 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 356 357 /* a negative number of items to send ==> fatal */ 358 if (n<0) 359 {error_msg_fatal("grop_hc() :: n=%D<0?",n);} 360 361 /* can't do more dimensions then exist */ 362 dim = PetscMin(dim,i_log2_num_nodes); 363 364 /* advance to list of n operations for custom */ 365 if ((type=oprs[0])==NON_UNIFORM) 366 {oprs++;} 367 368 if (!(fp = (vfp) rvec_fct_addr(type))) { 369 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 370 fp = (vfp) oprs; 371 } 372 373 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 374 { 375 dest = my_id^mask; 376 if (my_id > dest) 377 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 378 else 379 { 380 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 381 &status); 382 (*fp)(vals, work, n, oprs); 383 } 384 } 385 386 if (edge==dim) 387 {mask>>=1;} 388 else 389 {while (++edge<dim) {mask<<=1;}} 390 391 for (edge=0; edge<dim; edge++,mask>>=1) 392 { 393 if (my_id%mask) 394 {continue;} 395 396 dest = my_id^mask; 397 if (my_id < dest) 398 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 399 else 400 { 401 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 402 &status); 403 } 404 } 405 } 406 407 408 /***********************************comm.c************************************* 409 Function: gop() 410 411 Input : 412 Output: 413 Return: 414 Description: fan-in/out version 415 ***********************************comm.c*************************************/ 416 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type) 417 { 418 int mask, edge; 419 int dest; 420 MPI_Status status; 421 MPI_Op op; 422 int ierr; 423 424 #ifdef SAFE 425 /* check to make sure comm package has been initialized */ 426 if (!p_init) 427 {comm_init();} 428 429 /* ok ... should have some data, work, and operator(s) */ 430 if (!vals||!work||!fp) 431 {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);} 432 #endif 433 434 /* if there's nothing to do return */ 435 if ((num_nodes<2)||(!n)) 436 {return;} 437 438 /* a negative number of items to send ==> fatal */ 439 if (n<0) 440 {error_msg_fatal("gop() :: n=%D<0?",n);} 441 442 if (comm_type==MPI) 443 { 444 ierr = MPI_Op_create(fp,TRUE,&op); 445 ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 446 ierr = MPI_Op_free(&op); 447 return; 448 } 449 450 /* if not a hypercube must colapse partial dim */ 451 if (edge_not_pow_2) 452 { 453 if (my_id >= floor_num_nodes) 454 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 455 MPI_COMM_WORLD);} 456 else 457 { 458 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 459 MPI_COMM_WORLD,&status); 460 (*fp)(vals,work,&n,&dt); 461 } 462 } 463 464 /* implement the mesh fan in/out exchange algorithm */ 465 if (my_id<floor_num_nodes) 466 { 467 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 468 { 469 dest = my_id^mask; 470 if (my_id > dest) 471 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 472 else 473 { 474 ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 475 MPI_COMM_WORLD, &status); 476 (*fp)(vals, work, &n, &dt); 477 } 478 } 479 480 mask=floor_num_nodes>>1; 481 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 482 { 483 if (my_id%mask) 484 {continue;} 485 486 dest = my_id^mask; 487 if (my_id < dest) 488 {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 489 else 490 { 491 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 492 MPI_COMM_WORLD, &status); 493 } 494 } 495 } 496 497 /* if not a hypercube must expand to partial dim */ 498 if (edge_not_pow_2) 499 { 500 if (my_id >= floor_num_nodes) 501 { 502 ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 503 MPI_COMM_WORLD,&status); 504 } 505 else 506 {ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 507 MPI_COMM_WORLD);} 508 } 509 } 510 511 512 513 514 515 516 /****************************************************************************** 517 Function: giop() 518 519 Input : 520 Output: 521 Return: 522 Description: 523 524 ii+1 entries in seg :: 0 .. ii 525 526 ******************************************************************************/ 527 void 528 ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, 529 int *segs) 530 { 531 int edge, type, dest, mask; 532 int stage_n; 533 MPI_Status status; 534 int ierr; 535 536 #ifdef SAFE 537 /* check to make sure comm package has been initialized */ 538 if (!p_init) 539 {comm_init();} 540 #endif 541 542 543 /* all msgs are *NOT* the same length */ 544 /* implement the mesh fan in/out exchange algorithm */ 545 for (mask=0, edge=0; edge<level; edge++, mask++) 546 { 547 stage_n = (segs[level] - segs[edge]); 548 if (stage_n && !(my_id & mask)) 549 { 550 dest = edge_node[edge]; 551 type = MSGTAG3 + my_id + (num_nodes*edge); 552 if (my_id>dest) 553 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 554 MPI_COMM_WORLD);} 555 else 556 { 557 type = type - my_id + dest; 558 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 559 MPI_COMM_WORLD,&status); 560 rvec_add(vals+segs[edge], work, stage_n); 561 } 562 } 563 mask <<= 1; 564 } 565 mask>>=1; 566 for (edge=0; edge<level; edge++) 567 { 568 stage_n = (segs[level] - segs[level-1-edge]); 569 if (stage_n && !(my_id & mask)) 570 { 571 dest = edge_node[level-edge-1]; 572 type = MSGTAG6 + my_id + (num_nodes*edge); 573 if (my_id<dest) 574 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 575 MPI_COMM_WORLD);} 576 else 577 { 578 type = type - my_id + dest; 579 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 580 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 581 } 582 } 583 mask >>= 1; 584 } 585 } 586 587 588 589 /***********************************comm.c************************************* 590 Function: grop_hc_vvl() 591 592 Input : 593 Output: 594 Return: 595 Description: fan-in/out version 596 597 note good only for num_nodes=2^k!!! 598 599 ***********************************comm.c*************************************/ 600 void 601 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim) 602 { 603 int mask, edge, n; 604 int type, dest; 605 vfp fp; 606 MPI_Status status; 607 int ierr; 608 609 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 610 611 #ifdef SAFE 612 /* ok ... should have some data, work, and operator(s) */ 613 if (!vals||!work||!oprs||!segs) 614 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 615 616 /* non-uniform should have at least two entries */ 617 618 /* check to make sure comm package has been initialized */ 619 if (!p_init) 620 {comm_init();} 621 #endif 622 623 /* if there's nothing to do return */ 624 if ((num_nodes<2)||(dim<=0)) 625 {return;} 626 627 /* the error msg says it all!!! */ 628 if (modfl_num_nodes) 629 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 630 631 /* can't do more dimensions then exist */ 632 dim = PetscMin(dim,i_log2_num_nodes); 633 634 /* advance to list of n operations for custom */ 635 if ((type=oprs[0])==NON_UNIFORM) 636 {oprs++;} 637 638 if (!(fp = (vfp) rvec_fct_addr(type))){ 639 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 640 fp = (vfp) oprs; 641 } 642 643 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 644 { 645 n = segs[dim]-segs[edge]; 646 dest = my_id^mask; 647 if (my_id > dest) 648 {ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 649 else 650 { 651 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 652 MPI_COMM_WORLD, &status); 653 (*fp)(vals+segs[edge], work, n, oprs); 654 } 655 } 656 657 if (edge==dim) 658 {mask>>=1;} 659 else 660 {while (++edge<dim) {mask<<=1;}} 661 662 for (edge=0; edge<dim; edge++,mask>>=1) 663 { 664 if (my_id%mask) 665 {continue;} 666 667 n = (segs[dim]-segs[dim-1-edge]); 668 669 dest = my_id^mask; 670 if (my_id < dest) 671 {ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, 672 MPI_COMM_WORLD);} 673 else 674 { 675 ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE, 676 MSGTAG4+dest,MPI_COMM_WORLD, &status); 677 } 678 } 679 } 680 681 /****************************************************************************** 682 Function: giop() 683 684 Input : 685 Output: 686 Return: 687 Description: 688 689 ii+1 entries in seg :: 0 .. ii 690 691 ******************************************************************************/ 692 void new_ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, int *segs) 693 { 694 int edge, type, dest, mask; 695 int stage_n; 696 MPI_Status status; 697 int ierr; 698 699 #ifdef SAFE 700 /* check to make sure comm package has been initialized */ 701 if (!p_init) 702 {comm_init();} 703 #endif 704 705 /* all msgs are *NOT* the same length */ 706 /* implement the mesh fan in/out exchange algorithm */ 707 for (mask=0, edge=0; edge<level; edge++, mask++) 708 { 709 stage_n = (segs[level] - segs[edge]); 710 if (stage_n && !(my_id & mask)) 711 { 712 dest = edge_node[edge]; 713 type = MSGTAG3 + my_id + (num_nodes*edge); 714 if (my_id>dest) 715 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 716 MPI_COMM_WORLD);} 717 else 718 { 719 type = type - my_id + dest; 720 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 721 MPI_COMM_WORLD,&status); 722 rvec_add(vals+segs[edge], work, stage_n); 723 } 724 } 725 mask <<= 1; 726 } 727 mask>>=1; 728 for (edge=0; edge<level; edge++) 729 { 730 stage_n = (segs[level] - segs[level-1-edge]); 731 if (stage_n && !(my_id & mask)) 732 { 733 dest = edge_node[level-edge-1]; 734 type = MSGTAG6 + my_id + (num_nodes*edge); 735 if (my_id<dest) 736 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 737 MPI_COMM_WORLD);} 738 else 739 { 740 type = type - my_id + dest; 741 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 742 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 743 } 744 } 745 mask >>= 1; 746 } 747 } 748 749 750 751 /***********************************comm.c************************************* 752 Function: giop() 753 754 Input : 755 Output: 756 Return: 757 Description: fan-in/out version 758 759 note good only for num_nodes=2^k!!! 760 761 ***********************************comm.c*************************************/ 762 void giop_hc(int *vals, int *work, int n, int *oprs, int dim) 763 { 764 int mask, edge; 765 int type, dest; 766 vfp fp; 767 MPI_Status status; 768 int ierr; 769 770 #ifdef SAFE 771 /* ok ... should have some data, work, and operator(s) */ 772 if (!vals||!work||!oprs) 773 {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 774 775 /* non-uniform should have at least two entries */ 776 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 777 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 778 779 /* check to make sure comm package has been initialized */ 780 if (!p_init) 781 {comm_init();} 782 #endif 783 784 /* if there's nothing to do return */ 785 if ((num_nodes<2)||(!n)||(dim<=0)) 786 {return;} 787 788 /* the error msg says it all!!! */ 789 if (modfl_num_nodes) 790 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 791 792 /* a negative number of items to send ==> fatal */ 793 if (n<0) 794 {error_msg_fatal("giop_hc() :: n=%D<0?",n);} 795 796 /* can't do more dimensions then exist */ 797 dim = PetscMin(dim,i_log2_num_nodes); 798 799 /* advance to list of n operations for custom */ 800 if ((type=oprs[0])==NON_UNIFORM) 801 {oprs++;} 802 803 if (!(fp = (vfp) ivec_fct_addr(type))){ 804 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 805 fp = (vfp) oprs; 806 } 807 808 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 809 { 810 dest = my_id^mask; 811 if (my_id > dest) 812 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 813 else 814 { 815 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 816 &status); 817 (*fp)(vals, work, n, oprs); 818 } 819 } 820 821 if (edge==dim) 822 {mask>>=1;} 823 else 824 {while (++edge<dim) {mask<<=1;}} 825 826 for (edge=0; edge<dim; edge++,mask>>=1) 827 { 828 if (my_id%mask) 829 {continue;} 830 831 dest = my_id^mask; 832 if (my_id < dest) 833 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 834 else 835 { 836 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 837 &status); 838 } 839 } 840 } 841