1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include "src/ksp/pc/impls/tfs/tfs.h" 24 25 26 /* global program control variables - explicitly exported */ 27 int my_id = 0; 28 int num_nodes = 1; 29 int floor_num_nodes = 0; 30 int i_log2_num_nodes = 0; 31 32 /* global program control variables */ 33 static int p_init = 0; 34 static int modfl_num_nodes; 35 static int edge_not_pow_2; 36 37 static unsigned int edge_node[sizeof(PetscInt)*32]; 38 39 /***********************************comm.c************************************* 40 Function: giop() 41 42 Input : 43 Output: 44 Return: 45 Description: 46 ***********************************comm.c*************************************/ 47 void 48 comm_init (void) 49 { 50 51 if (p_init++) return; 52 53 #if PETSC_SIZEOF_INT != 4 54 error_msg_warning("Int != 4 Bytes!"); 55 #endif 56 57 58 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 59 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 60 61 if (num_nodes> (INT_MAX >> 1)) 62 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 63 64 ivec_zero((int*)edge_node,sizeof(PetscInt)*32); 65 66 floor_num_nodes = 1; 67 i_log2_num_nodes = modfl_num_nodes = 0; 68 while (floor_num_nodes <= num_nodes) 69 { 70 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 71 floor_num_nodes <<= 1; 72 i_log2_num_nodes++; 73 } 74 75 i_log2_num_nodes--; 76 floor_num_nodes >>= 1; 77 modfl_num_nodes = (num_nodes - floor_num_nodes); 78 79 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 80 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 81 else if (my_id >= floor_num_nodes) 82 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 83 } 84 else 85 {edge_not_pow_2 = 0;} 86 87 } 88 89 90 91 /***********************************comm.c************************************* 92 Function: giop() 93 94 Input : 95 Output: 96 Return: 97 Description: fan-in/out version 98 ***********************************comm.c*************************************/ 99 void 100 giop(int *vals, int *work, int n, int *oprs) 101 { 102 int mask, edge; 103 int type, dest; 104 vfp fp; 105 MPI_Status status; 106 107 108 #ifdef SAFE 109 /* ok ... should have some data, work, and operator(s) */ 110 if (!vals||!work||!oprs) 111 {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 112 113 /* non-uniform should have at least two entries */ 114 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 115 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 116 117 /* check to make sure comm package has been initialized */ 118 if (!p_init) 119 {comm_init();} 120 #endif 121 122 /* if there's nothing to do return */ 123 if ((num_nodes<2)||(!n)) 124 { 125 return; 126 } 127 128 /* a negative number if items to send ==> fatal */ 129 if (n<0) 130 {error_msg_fatal("giop() :: n=%D<0?",n);} 131 132 /* advance to list of n operations for custom */ 133 if ((type=oprs[0])==NON_UNIFORM) 134 {oprs++;} 135 136 /* major league hack */ 137 if (!(fp = (vfp) ivec_fct_addr(type))) { 138 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 139 fp = (vfp) oprs; 140 } 141 142 /* all msgs will be of the same length */ 143 /* if not a hypercube must colapse partial dim */ 144 if (edge_not_pow_2) 145 { 146 if (my_id >= floor_num_nodes) 147 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 148 else 149 { 150 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 151 MPI_COMM_WORLD,&status); 152 (*fp)(vals,work,n,oprs); 153 } 154 } 155 156 /* implement the mesh fan in/out exchange algorithm */ 157 if (my_id<floor_num_nodes) 158 { 159 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 160 { 161 dest = my_id^mask; 162 if (my_id > dest) 163 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 164 else 165 { 166 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 167 MPI_COMM_WORLD, &status); 168 (*fp)(vals, work, n, oprs); 169 } 170 } 171 172 mask=floor_num_nodes>>1; 173 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 174 { 175 if (my_id%mask) 176 {continue;} 177 178 dest = my_id^mask; 179 if (my_id < dest) 180 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 181 else 182 { 183 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 184 MPI_COMM_WORLD, &status); 185 } 186 } 187 } 188 189 /* if not a hypercube must expand to partial dim */ 190 if (edge_not_pow_2) 191 { 192 if (my_id >= floor_num_nodes) 193 { 194 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 195 MPI_COMM_WORLD,&status); 196 } 197 else 198 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 199 } 200 } 201 202 /***********************************comm.c************************************* 203 Function: grop() 204 205 Input : 206 Output: 207 Return: 208 Description: fan-in/out version 209 ***********************************comm.c*************************************/ 210 void 211 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs) 212 { 213 int mask, edge; 214 int type, dest; 215 vfp fp; 216 MPI_Status status; 217 218 219 #ifdef SAFE 220 /* ok ... should have some data, work, and operator(s) */ 221 if (!vals||!work||!oprs) 222 {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 223 224 /* non-uniform should have at least two entries */ 225 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 226 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 227 228 /* check to make sure comm package has been initialized */ 229 if (!p_init) 230 {comm_init();} 231 #endif 232 233 /* if there's nothing to do return */ 234 if ((num_nodes<2)||(!n)) 235 {return;} 236 237 /* a negative number of items to send ==> fatal */ 238 if (n<0) 239 {error_msg_fatal("gdop() :: n=%D<0?",n);} 240 241 /* advance to list of n operations for custom */ 242 if ((type=oprs[0])==NON_UNIFORM) 243 {oprs++;} 244 245 if (!(fp = (vfp) rvec_fct_addr(type))) { 246 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 247 fp = (vfp) oprs; 248 } 249 250 /* all msgs will be of the same length */ 251 /* if not a hypercube must colapse partial dim */ 252 if (edge_not_pow_2) 253 { 254 if (my_id >= floor_num_nodes) 255 {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id, 256 MPI_COMM_WORLD);} 257 else 258 { 259 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 260 MPI_COMM_WORLD,&status); 261 (*fp)(vals,work,n,oprs); 262 } 263 } 264 265 /* implement the mesh fan in/out exchange algorithm */ 266 if (my_id<floor_num_nodes) 267 { 268 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 269 { 270 dest = my_id^mask; 271 if (my_id > dest) 272 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 273 else 274 { 275 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 276 MPI_COMM_WORLD, &status); 277 (*fp)(vals, work, n, oprs); 278 } 279 } 280 281 mask=floor_num_nodes>>1; 282 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 283 { 284 if (my_id%mask) 285 {continue;} 286 287 dest = my_id^mask; 288 if (my_id < dest) 289 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 290 else 291 { 292 MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest, 293 MPI_COMM_WORLD, &status); 294 } 295 } 296 } 297 298 /* if not a hypercube must expand to partial dim */ 299 if (edge_not_pow_2) 300 { 301 if (my_id >= floor_num_nodes) 302 { 303 MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 304 MPI_COMM_WORLD,&status); 305 } 306 else 307 {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id, 308 MPI_COMM_WORLD);} 309 } 310 } 311 312 313 /***********************************comm.c************************************* 314 Function: grop() 315 316 Input : 317 Output: 318 Return: 319 Description: fan-in/out version 320 321 note good only for num_nodes=2^k!!! 322 323 ***********************************comm.c*************************************/ 324 void 325 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim) 326 { 327 int mask, edge; 328 int type, dest; 329 vfp fp; 330 MPI_Status status; 331 332 #ifdef SAFE 333 /* ok ... should have some data, work, and operator(s) */ 334 if (!vals||!work||!oprs) 335 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 336 337 /* non-uniform should have at least two entries */ 338 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 339 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 340 341 /* check to make sure comm package has been initialized */ 342 if (!p_init) 343 {comm_init();} 344 #endif 345 346 /* if there's nothing to do return */ 347 if ((num_nodes<2)||(!n)||(dim<=0)) 348 {return;} 349 350 /* the error msg says it all!!! */ 351 if (modfl_num_nodes) 352 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 353 354 /* a negative number of items to send ==> fatal */ 355 if (n<0) 356 {error_msg_fatal("grop_hc() :: n=%D<0?",n);} 357 358 /* can't do more dimensions then exist */ 359 dim = PetscMin(dim,i_log2_num_nodes); 360 361 /* advance to list of n operations for custom */ 362 if ((type=oprs[0])==NON_UNIFORM) 363 {oprs++;} 364 365 if (!(fp = (vfp) rvec_fct_addr(type))) { 366 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 367 fp = (vfp) oprs; 368 } 369 370 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 371 { 372 dest = my_id^mask; 373 if (my_id > dest) 374 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 375 else 376 { 377 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 378 &status); 379 (*fp)(vals, work, n, oprs); 380 } 381 } 382 383 if (edge==dim) 384 {mask>>=1;} 385 else 386 {while (++edge<dim) {mask<<=1;}} 387 388 for (edge=0; edge<dim; edge++,mask>>=1) 389 { 390 if (my_id%mask) 391 {continue;} 392 393 dest = my_id^mask; 394 if (my_id < dest) 395 {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 396 else 397 { 398 MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 399 &status); 400 } 401 } 402 } 403 404 405 /***********************************comm.c************************************* 406 Function: gop() 407 408 Input : 409 Output: 410 Return: 411 Description: fan-in/out version 412 ***********************************comm.c*************************************/ 413 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type) 414 { 415 int mask, edge; 416 int dest; 417 MPI_Status status; 418 MPI_Op op; 419 420 #ifdef SAFE 421 /* check to make sure comm package has been initialized */ 422 if (!p_init) 423 {comm_init();} 424 425 /* ok ... should have some data, work, and operator(s) */ 426 if (!vals||!work||!fp) 427 {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);} 428 #endif 429 430 /* if there's nothing to do return */ 431 if ((num_nodes<2)||(!n)) 432 {return;} 433 434 /* a negative number of items to send ==> fatal */ 435 if (n<0) 436 {error_msg_fatal("gop() :: n=%D<0?",n);} 437 438 if (comm_type==MPI) 439 { 440 MPI_Op_create(fp,TRUE,&op); 441 MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 442 MPI_Op_free(&op); 443 return; 444 } 445 446 /* if not a hypercube must colapse partial dim */ 447 if (edge_not_pow_2) 448 { 449 if (my_id >= floor_num_nodes) 450 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 451 MPI_COMM_WORLD);} 452 else 453 { 454 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 455 MPI_COMM_WORLD,&status); 456 (*fp)(vals,work,&n,&dt); 457 } 458 } 459 460 /* implement the mesh fan in/out exchange algorithm */ 461 if (my_id<floor_num_nodes) 462 { 463 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 464 { 465 dest = my_id^mask; 466 if (my_id > dest) 467 {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 468 else 469 { 470 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 471 MPI_COMM_WORLD, &status); 472 (*fp)(vals, work, &n, &dt); 473 } 474 } 475 476 mask=floor_num_nodes>>1; 477 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 478 { 479 if (my_id%mask) 480 {continue;} 481 482 dest = my_id^mask; 483 if (my_id < dest) 484 {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 485 else 486 { 487 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 488 MPI_COMM_WORLD, &status); 489 } 490 } 491 } 492 493 /* if not a hypercube must expand to partial dim */ 494 if (edge_not_pow_2) 495 { 496 if (my_id >= floor_num_nodes) 497 { 498 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 499 MPI_COMM_WORLD,&status); 500 } 501 else 502 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 503 MPI_COMM_WORLD);} 504 } 505 } 506 507 508 509 510 511 512 /****************************************************************************** 513 Function: giop() 514 515 Input : 516 Output: 517 Return: 518 Description: 519 520 ii+1 entries in seg :: 0 .. ii 521 522 ******************************************************************************/ 523 void 524 ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, 525 int *segs) 526 { 527 int edge, type, dest, mask; 528 int stage_n; 529 MPI_Status status; 530 531 #ifdef SAFE 532 /* check to make sure comm package has been initialized */ 533 if (!p_init) 534 {comm_init();} 535 #endif 536 537 538 /* all msgs are *NOT* the same length */ 539 /* implement the mesh fan in/out exchange algorithm */ 540 for (mask=0, edge=0; edge<level; edge++, mask++) 541 { 542 stage_n = (segs[level] - segs[edge]); 543 if (stage_n && !(my_id & mask)) 544 { 545 dest = edge_node[edge]; 546 type = MSGTAG3 + my_id + (num_nodes*edge); 547 if (my_id>dest) 548 {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 549 MPI_COMM_WORLD);} 550 else 551 { 552 type = type - my_id + dest; 553 MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 554 MPI_COMM_WORLD,&status); 555 rvec_add(vals+segs[edge], work, stage_n); 556 } 557 } 558 mask <<= 1; 559 } 560 mask>>=1; 561 for (edge=0; edge<level; edge++) 562 { 563 stage_n = (segs[level] - segs[level-1-edge]); 564 if (stage_n && !(my_id & mask)) 565 { 566 dest = edge_node[level-edge-1]; 567 type = MSGTAG6 + my_id + (num_nodes*edge); 568 if (my_id<dest) 569 {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 570 MPI_COMM_WORLD);} 571 else 572 { 573 type = type - my_id + dest; 574 MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 575 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 576 } 577 } 578 mask >>= 1; 579 } 580 } 581 582 583 584 /***********************************comm.c************************************* 585 Function: grop_hc_vvl() 586 587 Input : 588 Output: 589 Return: 590 Description: fan-in/out version 591 592 note good only for num_nodes=2^k!!! 593 594 ***********************************comm.c*************************************/ 595 void 596 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim) 597 { 598 int mask, edge, n; 599 int type, dest; 600 vfp fp; 601 MPI_Status status; 602 603 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 604 605 #ifdef SAFE 606 /* ok ... should have some data, work, and operator(s) */ 607 if (!vals||!work||!oprs||!segs) 608 {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 609 610 /* non-uniform should have at least two entries */ 611 612 /* check to make sure comm package has been initialized */ 613 if (!p_init) 614 {comm_init();} 615 #endif 616 617 /* if there's nothing to do return */ 618 if ((num_nodes<2)||(dim<=0)) 619 {return;} 620 621 /* the error msg says it all!!! */ 622 if (modfl_num_nodes) 623 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 624 625 /* can't do more dimensions then exist */ 626 dim = PetscMin(dim,i_log2_num_nodes); 627 628 /* advance to list of n operations for custom */ 629 if ((type=oprs[0])==NON_UNIFORM) 630 {oprs++;} 631 632 if (!(fp = (vfp) rvec_fct_addr(type))){ 633 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 634 fp = (vfp) oprs; 635 } 636 637 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 638 { 639 n = segs[dim]-segs[edge]; 640 dest = my_id^mask; 641 if (my_id > dest) 642 {MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 643 else 644 { 645 MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest, 646 MPI_COMM_WORLD, &status); 647 (*fp)(vals+segs[edge], work, n, oprs); 648 } 649 } 650 651 if (edge==dim) 652 {mask>>=1;} 653 else 654 {while (++edge<dim) {mask<<=1;}} 655 656 for (edge=0; edge<dim; edge++,mask>>=1) 657 { 658 if (my_id%mask) 659 {continue;} 660 661 n = (segs[dim]-segs[dim-1-edge]); 662 663 dest = my_id^mask; 664 if (my_id < dest) 665 {MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id, 666 MPI_COMM_WORLD);} 667 else 668 { 669 MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE, 670 MSGTAG4+dest,MPI_COMM_WORLD, &status); 671 } 672 } 673 } 674 675 /****************************************************************************** 676 Function: giop() 677 678 Input : 679 Output: 680 Return: 681 Description: 682 683 ii+1 entries in seg :: 0 .. ii 684 685 ******************************************************************************/ 686 void new_ssgl_radd( PetscScalar *vals, PetscScalar *work, int level, int *segs) 687 { 688 int edge, type, dest, mask; 689 int stage_n; 690 MPI_Status status; 691 692 #ifdef SAFE 693 /* check to make sure comm package has been initialized */ 694 if (!p_init) 695 {comm_init();} 696 #endif 697 698 /* all msgs are *NOT* the same length */ 699 /* implement the mesh fan in/out exchange algorithm */ 700 for (mask=0, edge=0; edge<level; edge++, mask++) 701 { 702 stage_n = (segs[level] - segs[edge]); 703 if (stage_n && !(my_id & mask)) 704 { 705 dest = edge_node[edge]; 706 type = MSGTAG3 + my_id + (num_nodes*edge); 707 if (my_id>dest) 708 {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, 709 MPI_COMM_WORLD);} 710 else 711 { 712 type = type - my_id + dest; 713 MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, 714 MPI_COMM_WORLD,&status); 715 rvec_add(vals+segs[edge], work, stage_n); 716 } 717 } 718 mask <<= 1; 719 } 720 mask>>=1; 721 for (edge=0; edge<level; edge++) 722 { 723 stage_n = (segs[level] - segs[level-1-edge]); 724 if (stage_n && !(my_id & mask)) 725 { 726 dest = edge_node[level-edge-1]; 727 type = MSGTAG6 + my_id + (num_nodes*edge); 728 if (my_id<dest) 729 {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type, 730 MPI_COMM_WORLD);} 731 else 732 { 733 type = type - my_id + dest; 734 MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, 735 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 736 } 737 } 738 mask >>= 1; 739 } 740 } 741 742 743 744 /***********************************comm.c************************************* 745 Function: giop() 746 747 Input : 748 Output: 749 Return: 750 Description: fan-in/out version 751 752 note good only for num_nodes=2^k!!! 753 754 ***********************************comm.c*************************************/ 755 void giop_hc(int *vals, int *work, int n, int *oprs, int dim) 756 { 757 int mask, edge; 758 int type, dest; 759 vfp fp; 760 MPI_Status status; 761 762 #ifdef SAFE 763 /* ok ... should have some data, work, and operator(s) */ 764 if (!vals||!work||!oprs) 765 {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 766 767 /* non-uniform should have at least two entries */ 768 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 769 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 770 771 /* check to make sure comm package has been initialized */ 772 if (!p_init) 773 {comm_init();} 774 #endif 775 776 /* if there's nothing to do return */ 777 if ((num_nodes<2)||(!n)||(dim<=0)) 778 {return;} 779 780 /* the error msg says it all!!! */ 781 if (modfl_num_nodes) 782 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 783 784 /* a negative number of items to send ==> fatal */ 785 if (n<0) 786 {error_msg_fatal("giop_hc() :: n=%D<0?",n);} 787 788 /* can't do more dimensions then exist */ 789 dim = PetscMin(dim,i_log2_num_nodes); 790 791 /* advance to list of n operations for custom */ 792 if ((type=oprs[0])==NON_UNIFORM) 793 {oprs++;} 794 795 if (!(fp = (vfp) ivec_fct_addr(type))){ 796 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 797 fp = (vfp) oprs; 798 } 799 800 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 801 { 802 dest = my_id^mask; 803 if (my_id > dest) 804 {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 805 else 806 { 807 MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 808 &status); 809 (*fp)(vals, work, n, oprs); 810 } 811 } 812 813 if (edge==dim) 814 {mask>>=1;} 815 else 816 {while (++edge<dim) {mask<<=1;}} 817 818 for (edge=0; edge<dim; edge++,mask>>=1) 819 { 820 if (my_id%mask) 821 {continue;} 822 823 dest = my_id^mask; 824 if (my_id < dest) 825 {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 826 else 827 { 828 MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 829 &status); 830 } 831 } 832 } 833