1 2 /***********************************comm.c************************************* 3 SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 18 /***********************************comm.c************************************* 19 File Description: 20 ----------------- 21 22 ***********************************comm.c*************************************/ 23 #include <stdio.h> 24 #include "petsc.h" 25 26 #if defined NXSRC 27 #ifndef DELTA 28 #include <nx.h> 29 #endif 30 31 #elif defined MPISRC 32 #include <mpi.h> 33 34 #endif 35 36 37 #include "const.h" 38 #include "types.h" 39 #include "error.h" 40 #include "ivec.h" 41 #include "bss_malloc.h" 42 #include "comm.h" 43 44 45 /* global program control variables - explicitly exported */ 46 int my_id = 0; 47 int num_nodes = 1; 48 int floor_num_nodes = 0; 49 int i_log2_num_nodes = 0; 50 51 /* global program control variables */ 52 static int p_init = 0; 53 static int modfl_num_nodes; 54 static int edge_not_pow_2; 55 56 static unsigned int edge_node[INT_LEN*32]; 57 58 static void sgl_iadd(int *vals, int level); 59 static void sgl_radd(double *vals, int level); 60 static void hmt_concat(REAL *vals, REAL *work, int size); 61 62 63 /***********************************comm.c************************************* 64 Function: giop() 65 66 Input : 67 Output: 68 Return: 69 Description: 70 ***********************************comm.c*************************************/ 71 void 72 comm_init (void) 73 { 74 #ifdef DEBUG 75 error_msg_warning("c_init() :: start\n"); 76 #endif 77 78 if (p_init++) return; 79 80 #if PETSC_SIZEOF_INT != 4 81 error_msg_warning("Int != 4 Bytes!"); 82 #endif 83 84 85 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 86 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 87 88 if (num_nodes> (INT_MAX >> 1)) 89 {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");} 90 91 ivec_zero((int*)edge_node,INT_LEN*32); 92 93 floor_num_nodes = 1; 94 i_log2_num_nodes = modfl_num_nodes = 0; 95 while (floor_num_nodes <= num_nodes) 96 { 97 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 98 floor_num_nodes <<= 1; 99 i_log2_num_nodes++; 100 } 101 102 i_log2_num_nodes--; 103 floor_num_nodes >>= 1; 104 modfl_num_nodes = (num_nodes - floor_num_nodes); 105 106 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 107 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 108 else if (my_id >= floor_num_nodes) 109 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 110 } 111 else 112 {edge_not_pow_2 = 0;} 113 114 #ifdef DEBUG 115 error_msg_warning("c_init() done :: my_id=%d, num_nodes=%d",my_id,num_nodes); 116 #endif 117 } 118 119 120 121 /***********************************comm.c************************************* 122 Function: giop() 123 124 Input : 125 Output: 126 Return: 127 Description: fan-in/out version 128 ***********************************comm.c*************************************/ 129 void 130 giop(int *vals, int *work, int n, int *oprs) 131 { 132 register int mask, edge; 133 int type, dest; 134 vfp fp; 135 #if defined MPISRC 136 MPI_Status status; 137 #elif defined NXSRC 138 int len; 139 #endif 140 141 142 #ifdef SAFE 143 /* ok ... should have some data, work, and operator(s) */ 144 if (!vals||!work||!oprs) 145 {error_msg_fatal("giop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 146 147 /* non-uniform should have at least two entries */ 148 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 149 {error_msg_fatal("giop() :: non_uniform and n=0,1?");} 150 151 /* check to make sure comm package has been initialized */ 152 if (!p_init) 153 {comm_init();} 154 #endif 155 156 /* if there's nothing to do return */ 157 if ((num_nodes<2)||(!n)) 158 { 159 #ifdef DEBUG 160 error_msg_warning("giop() :: n=%d num_nodes=%d",n,num_nodes); 161 #endif 162 return; 163 } 164 165 /* a negative number if items to send ==> fatal */ 166 if (n<0) 167 {error_msg_fatal("giop() :: n=%d<0?",n);} 168 169 /* advance to list of n operations for custom */ 170 if ((type=oprs[0])==NON_UNIFORM) 171 {oprs++;} 172 173 /* major league hack */ 174 if (!(fp = (vfp) ivec_fct_addr(type))) { 175 error_msg_warning("giop() :: hope you passed in a rbfp!\n"); 176 fp = (vfp) oprs; 177 } 178 179 #if defined NXSRC 180 /* all msgs will be of the same length */ 181 len = n*INT_LEN; 182 183 /* if not a hypercube must colapse partial dim */ 184 if (edge_not_pow_2) 185 { 186 if (my_id >= floor_num_nodes) 187 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 188 else 189 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 190 } 191 192 /* implement the mesh fan in/out exchange algorithm */ 193 if (my_id<floor_num_nodes) 194 { 195 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 196 { 197 dest = my_id^mask; 198 if (my_id > dest) 199 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 200 else 201 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 202 } 203 204 mask=floor_num_nodes>>1; 205 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 206 { 207 if (my_id%mask) 208 {continue;} 209 210 dest = my_id^mask; 211 if (my_id < dest) 212 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 213 else 214 {crecv(MSGTAG4+dest,(char *)vals,len);} 215 } 216 } 217 218 /* if not a hypercube must expand to partial dim */ 219 if (edge_not_pow_2) 220 { 221 if (my_id >= floor_num_nodes) 222 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 223 else 224 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 225 } 226 227 #elif defined MPISRC 228 /* all msgs will be of the same length */ 229 /* if not a hypercube must colapse partial dim */ 230 if (edge_not_pow_2) 231 { 232 if (my_id >= floor_num_nodes) 233 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 234 else 235 { 236 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 237 MPI_COMM_WORLD,&status); 238 (*fp)(vals,work,n,oprs); 239 } 240 } 241 242 /* implement the mesh fan in/out exchange algorithm */ 243 if (my_id<floor_num_nodes) 244 { 245 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 246 { 247 dest = my_id^mask; 248 if (my_id > dest) 249 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 250 else 251 { 252 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 253 MPI_COMM_WORLD, &status); 254 (*fp)(vals, work, n, oprs); 255 } 256 } 257 258 mask=floor_num_nodes>>1; 259 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 260 { 261 if (my_id%mask) 262 {continue;} 263 264 dest = my_id^mask; 265 if (my_id < dest) 266 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 267 else 268 { 269 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 270 MPI_COMM_WORLD, &status); 271 } 272 } 273 } 274 275 /* if not a hypercube must expand to partial dim */ 276 if (edge_not_pow_2) 277 { 278 if (my_id >= floor_num_nodes) 279 { 280 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 281 MPI_COMM_WORLD,&status); 282 } 283 else 284 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 285 } 286 #else 287 return; 288 #endif 289 } 290 291 /***********************************comm.c************************************* 292 Function: grop() 293 294 Input : 295 Output: 296 Return: 297 Description: fan-in/out version 298 ***********************************comm.c*************************************/ 299 void 300 grop(REAL *vals, REAL *work, int n, int *oprs) 301 { 302 register int mask, edge; 303 int type, dest; 304 vfp fp; 305 #if defined MPISRC 306 MPI_Status status; 307 #elif defined NXSRC 308 int len; 309 #endif 310 311 312 #ifdef SAFE 313 /* ok ... should have some data, work, and operator(s) */ 314 if (!vals||!work||!oprs) 315 {error_msg_fatal("grop() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 316 317 /* non-uniform should have at least two entries */ 318 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 319 {error_msg_fatal("grop() :: non_uniform and n=0,1?");} 320 321 /* check to make sure comm package has been initialized */ 322 if (!p_init) 323 {comm_init();} 324 #endif 325 326 /* if there's nothing to do return */ 327 if ((num_nodes<2)||(!n)) 328 {return;} 329 330 /* a negative number of items to send ==> fatal */ 331 if (n<0) 332 {error_msg_fatal("gdop() :: n=%d<0?",n);} 333 334 /* advance to list of n operations for custom */ 335 if ((type=oprs[0])==NON_UNIFORM) 336 {oprs++;} 337 338 if (!(fp = (vfp) rvec_fct_addr(type))) { 339 error_msg_warning("grop() :: hope you passed in a rbfp!\n"); 340 fp = (vfp) oprs; 341 } 342 343 #if defined NXSRC 344 /* all msgs will be of the same length */ 345 len = n*REAL_LEN; 346 347 /* if not a hypercube must colapse partial dim */ 348 if (edge_not_pow_2) 349 { 350 if (my_id >= floor_num_nodes) 351 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 352 else 353 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 354 } 355 356 /* implement the mesh fan in/out exchange algorithm */ 357 if (my_id<floor_num_nodes) 358 { 359 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 360 { 361 dest = my_id^mask; 362 if (my_id > dest) 363 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 364 else 365 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 366 } 367 368 mask=floor_num_nodes>>1; 369 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 370 { 371 if (my_id%mask) 372 {continue;} 373 374 dest = my_id^mask; 375 if (my_id < dest) 376 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 377 else 378 {crecv(MSGTAG4+dest,(char *)vals,len);} 379 } 380 } 381 382 /* if not a hypercube must expand to partial dim */ 383 if (edge_not_pow_2) 384 { 385 if (my_id >= floor_num_nodes) 386 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 387 else 388 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 389 } 390 391 #elif defined MPISRC 392 /* all msgs will be of the same length */ 393 /* if not a hypercube must colapse partial dim */ 394 if (edge_not_pow_2) 395 { 396 if (my_id >= floor_num_nodes) 397 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id, 398 MPI_COMM_WORLD);} 399 else 400 { 401 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 402 MPI_COMM_WORLD,&status); 403 (*fp)(vals,work,n,oprs); 404 } 405 } 406 407 /* implement the mesh fan in/out exchange algorithm */ 408 if (my_id<floor_num_nodes) 409 { 410 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 411 { 412 dest = my_id^mask; 413 if (my_id > dest) 414 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 415 else 416 { 417 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 418 MPI_COMM_WORLD, &status); 419 (*fp)(vals, work, n, oprs); 420 } 421 } 422 423 mask=floor_num_nodes>>1; 424 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 425 { 426 if (my_id%mask) 427 {continue;} 428 429 dest = my_id^mask; 430 if (my_id < dest) 431 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 432 else 433 { 434 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest, 435 MPI_COMM_WORLD, &status); 436 } 437 } 438 } 439 440 /* if not a hypercube must expand to partial dim */ 441 if (edge_not_pow_2) 442 { 443 if (my_id >= floor_num_nodes) 444 { 445 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 446 MPI_COMM_WORLD,&status); 447 } 448 else 449 {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id, 450 MPI_COMM_WORLD);} 451 } 452 #else 453 return; 454 #endif 455 } 456 457 458 /***********************************comm.c************************************* 459 Function: grop() 460 461 Input : 462 Output: 463 Return: 464 Description: fan-in/out version 465 466 note good only for num_nodes=2^k!!! 467 468 ***********************************comm.c*************************************/ 469 void 470 grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim) 471 { 472 register int mask, edge; 473 int type, dest; 474 vfp fp; 475 #if defined MPISRC 476 MPI_Status status; 477 #elif defined NXSRC 478 int len; 479 #endif 480 481 482 #ifdef SAFE 483 /* ok ... should have some data, work, and operator(s) */ 484 if (!vals||!work||!oprs) 485 {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 486 487 /* non-uniform should have at least two entries */ 488 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 489 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 490 491 /* check to make sure comm package has been initialized */ 492 if (!p_init) 493 {comm_init();} 494 #endif 495 496 /* if there's nothing to do return */ 497 if ((num_nodes<2)||(!n)||(dim<=0)) 498 {return;} 499 500 /* the error msg says it all!!! */ 501 if (modfl_num_nodes) 502 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 503 504 /* a negative number of items to send ==> fatal */ 505 if (n<0) 506 {error_msg_fatal("grop_hc() :: n=%d<0?",n);} 507 508 /* can't do more dimensions then exist */ 509 dim = MIN(dim,i_log2_num_nodes); 510 511 /* advance to list of n operations for custom */ 512 if ((type=oprs[0])==NON_UNIFORM) 513 {oprs++;} 514 515 if (!(fp = (vfp) rvec_fct_addr(type))) { 516 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 517 fp = (vfp) oprs; 518 } 519 520 #if defined NXSRC 521 /* all msgs will be of the same length */ 522 len = n*REAL_LEN; 523 524 /* implement the mesh fan in/out exchange algorithm */ 525 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 526 { 527 dest = my_id^mask; 528 if (my_id > dest) 529 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 530 else 531 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 532 } 533 534 /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 535 if (edge==dim) 536 {mask>>=1;} 537 else 538 {while (++edge<dim) {mask<<=1;}} 539 540 for (edge=0; edge<dim; edge++,mask>>=1) 541 { 542 if (my_id%mask) 543 {continue;} 544 545 dest = my_id^mask; 546 if (my_id < dest) 547 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 548 else 549 {crecv(MSGTAG4+dest,(char *)vals,len);} 550 } 551 552 #elif defined MPISRC 553 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 554 { 555 dest = my_id^mask; 556 if (my_id > dest) 557 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 558 else 559 { 560 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 561 &status); 562 (*fp)(vals, work, n, oprs); 563 } 564 } 565 566 if (edge==dim) 567 {mask>>=1;} 568 else 569 {while (++edge<dim) {mask<<=1;}} 570 571 for (edge=0; edge<dim; edge++,mask>>=1) 572 { 573 if (my_id%mask) 574 {continue;} 575 576 dest = my_id^mask; 577 if (my_id < dest) 578 {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 579 else 580 { 581 MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 582 &status); 583 } 584 } 585 #else 586 return; 587 #endif 588 } 589 590 591 /***********************************comm.c************************************* 592 Function: gop() 593 594 Input : 595 Output: 596 Return: 597 Description: fan-in/out version 598 ***********************************comm.c*************************************/ 599 void 600 gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type) 601 { 602 register int mask, edge; 603 int dest; 604 #if defined MPISRC 605 MPI_Status status; 606 MPI_Op op; 607 #elif defined NXSRC 608 int len; 609 #endif 610 611 612 #ifdef SAFE 613 /* check to make sure comm package has been initialized */ 614 if (!p_init) 615 {comm_init();} 616 617 /* ok ... should have some data, work, and operator(s) */ 618 if (!vals||!work||!fp) 619 {error_msg_fatal("gop() :: v=%d, w=%d, f=%d",vals,work,fp);} 620 #endif 621 622 /* if there's nothing to do return */ 623 if ((num_nodes<2)||(!n)) 624 {return;} 625 626 /* a negative number of items to send ==> fatal */ 627 if (n<0) 628 {error_msg_fatal("gop() :: n=%d<0?",n);} 629 630 #if defined NXSRC 631 switch (dt) { 632 case REAL_TYPE: 633 len = n*REAL_LEN; 634 break; 635 case INT_TYPE: 636 len = n*INT_LEN; 637 break; 638 default: 639 error_msg_fatal("gop() :: unrecognized datatype!!!\n"); 640 } 641 642 /* if not a hypercube must colapse partial dim */ 643 if (edge_not_pow_2) 644 { 645 if (my_id >= floor_num_nodes) 646 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 647 else 648 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,dt);} 649 } 650 651 /* implement the mesh fan in/out exchange algorithm */ 652 if (my_id<floor_num_nodes) 653 { 654 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 655 { 656 dest = my_id^mask; 657 if (my_id > dest) 658 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 659 else 660 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals,work,n,dt);} 661 } 662 663 mask=floor_num_nodes>>1; 664 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 665 { 666 if (my_id%mask) 667 {continue;} 668 669 dest = my_id^mask; 670 if (my_id < dest) 671 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 672 else 673 {crecv(MSGTAG4+dest,(char *)vals,len);} 674 } 675 } 676 677 /* if not a hypercube must expand to partial dim */ 678 if (edge_not_pow_2) 679 { 680 if (my_id >= floor_num_nodes) 681 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 682 else 683 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 684 } 685 686 #elif defined MPISRC 687 688 if (comm_type==MPI) 689 { 690 MPI_Op_create(fp,TRUE,&op); 691 MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD); 692 MPI_Op_free(&op); 693 return; 694 } 695 696 /* if not a hypercube must colapse partial dim */ 697 if (edge_not_pow_2) 698 { 699 if (my_id >= floor_num_nodes) 700 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id, 701 MPI_COMM_WORLD);} 702 else 703 { 704 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 705 MPI_COMM_WORLD,&status); 706 (*fp)(vals,work,&n,&dt); 707 } 708 } 709 710 /* implement the mesh fan in/out exchange algorithm */ 711 if (my_id<floor_num_nodes) 712 { 713 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 714 { 715 dest = my_id^mask; 716 if (my_id > dest) 717 {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 718 else 719 { 720 MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest, 721 MPI_COMM_WORLD, &status); 722 (*fp)(vals, work, &n, &dt); 723 } 724 } 725 726 mask=floor_num_nodes>>1; 727 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 728 { 729 if (my_id%mask) 730 {continue;} 731 732 dest = my_id^mask; 733 if (my_id < dest) 734 {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 735 else 736 { 737 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest, 738 MPI_COMM_WORLD, &status); 739 } 740 } 741 } 742 743 /* if not a hypercube must expand to partial dim */ 744 if (edge_not_pow_2) 745 { 746 if (my_id >= floor_num_nodes) 747 { 748 MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 749 MPI_COMM_WORLD,&status); 750 } 751 else 752 {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id, 753 MPI_COMM_WORLD);} 754 } 755 #else 756 return; 757 #endif 758 } 759 760 761 762 763 764 765 /****************************************************************************** 766 Function: giop() 767 768 Input : 769 Output: 770 Return: 771 Description: 772 773 ii+1 entries in seg :: 0 .. ii 774 775 ******************************************************************************/ 776 void 777 ssgl_radd(register REAL *vals, register REAL *work, register int level, 778 register int *segs) 779 { 780 register int edge, type, dest, mask; 781 register int stage_n; 782 #if defined MPISRC 783 MPI_Status status; 784 #endif 785 786 787 #ifdef DEBUG 788 if (level > i_log2_num_nodes) 789 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 790 #endif 791 792 #ifdef SAFE 793 /* check to make sure comm package has been initialized */ 794 if (!p_init) 795 {comm_init();} 796 #endif 797 798 799 #if defined NXSRC 800 /* all msgs are *NOT* the same length */ 801 /* implement the mesh fan in/out exchange algorithm */ 802 for (mask=0, edge=0; edge<level; edge++, mask++) 803 { 804 stage_n = (segs[level] - segs[edge]); 805 if (stage_n && !(my_id & mask)) 806 { 807 dest = edge_node[edge]; 808 type = MSGTAG3 + my_id + (num_nodes*edge); 809 if (my_id>dest) 810 {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 811 else 812 { 813 type = type - my_id + dest; 814 crecv(type,work,stage_n*REAL_LEN); 815 rvec_add(vals+segs[edge], work, stage_n); 816 /* daxpy(vals+segs[edge], work, stage_n); */ 817 } 818 } 819 mask <<= 1; 820 } 821 mask>>=1; 822 for (edge=0; edge<level; edge++) 823 { 824 stage_n = (segs[level] - segs[level-1-edge]); 825 if (stage_n && !(my_id & mask)) 826 { 827 dest = edge_node[level-edge-1]; 828 type = MSGTAG6 + my_id + (num_nodes*edge); 829 if (my_id<dest) 830 {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 831 else 832 { 833 type = type - my_id + dest; 834 crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 835 } 836 } 837 mask >>= 1; 838 } 839 840 #elif defined MPISRC 841 /* all msgs are *NOT* the same length */ 842 /* implement the mesh fan in/out exchange algorithm */ 843 for (mask=0, edge=0; edge<level; edge++, mask++) 844 { 845 stage_n = (segs[level] - segs[edge]); 846 if (stage_n && !(my_id & mask)) 847 { 848 dest = edge_node[edge]; 849 type = MSGTAG3 + my_id + (num_nodes*edge); 850 if (my_id>dest) 851 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 852 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 853 MPI_COMM_WORLD);} 854 else 855 { 856 type = type - my_id + dest; 857 /* crecv(type,work,stage_n*REAL_LEN); */ 858 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 859 MPI_COMM_WORLD,&status); 860 rvec_add(vals+segs[edge], work, stage_n); 861 /* daxpy(vals+segs[edge], work, stage_n); */ 862 } 863 } 864 mask <<= 1; 865 } 866 mask>>=1; 867 for (edge=0; edge<level; edge++) 868 { 869 stage_n = (segs[level] - segs[level-1-edge]); 870 if (stage_n && !(my_id & mask)) 871 { 872 dest = edge_node[level-edge-1]; 873 type = MSGTAG6 + my_id + (num_nodes*edge); 874 if (my_id<dest) 875 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 876 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 877 MPI_COMM_WORLD);} 878 else 879 { 880 type = type - my_id + dest; 881 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 882 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 883 MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status); 884 } 885 } 886 mask >>= 1; 887 } 888 #else 889 return; 890 #endif 891 } 892 893 894 895 /***********************************comm.c************************************* 896 Function: grop_hc_vvl() 897 898 Input : 899 Output: 900 Return: 901 Description: fan-in/out version 902 903 note good only for num_nodes=2^k!!! 904 905 ***********************************comm.c*************************************/ 906 void 907 grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim) 908 { 909 register int mask, edge, n; 910 int type, dest; 911 vfp fp; 912 #if defined MPISRC 913 MPI_Status status; 914 #elif defined NXSRC 915 int len; 916 #endif 917 918 919 error_msg_fatal("grop_hc_vvl() :: is not working!\n"); 920 921 #ifdef SAFE 922 /* ok ... should have some data, work, and operator(s) */ 923 if (!vals||!work||!oprs||!segs) 924 {error_msg_fatal("grop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 925 926 /* non-uniform should have at least two entries */ 927 #if defined(not_used) 928 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 929 {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");} 930 #endif 931 932 /* check to make sure comm package has been initialized */ 933 if (!p_init) 934 {comm_init();} 935 #endif 936 937 /* if there's nothing to do return */ 938 if ((num_nodes<2)||(dim<=0)) 939 {return;} 940 941 /* the error msg says it all!!! */ 942 if (modfl_num_nodes) 943 {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");} 944 945 /* can't do more dimensions then exist */ 946 dim = MIN(dim,i_log2_num_nodes); 947 948 /* advance to list of n operations for custom */ 949 if ((type=oprs[0])==NON_UNIFORM) 950 {oprs++;} 951 952 if (!(fp = (vfp) rvec_fct_addr(type))){ 953 error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n"); 954 fp = (vfp) oprs; 955 } 956 957 #if defined NXSRC 958 /* all msgs are *NOT* the same length */ 959 /* implement the mesh fan in/out exchange algorithm */ 960 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 961 { 962 n = segs[dim]-segs[edge]; 963 /* 964 error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 965 edge,segs[edge]); 966 */ 967 len = n*REAL_LEN; 968 dest = my_id^mask; 969 if (my_id > dest) 970 {csend(MSGTAG2+my_id,(char *)vals+segs[edge],len,dest,0); break;} 971 else 972 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals+segs[edge], work, n, oprs);} 973 } 974 975 if (edge==dim) 976 {mask>>=1;} 977 else 978 {while (++edge<dim) {mask<<=1;}} 979 980 for (edge=0; edge<dim; edge++,mask>>=1) 981 { 982 if (my_id%mask) 983 {continue;} 984 len = (segs[dim]-segs[dim-1-edge])*REAL_LEN; 985 /* 986 error_msg_warning("n=%d, segs[%d]=%d, segs[%d]=%d\n",n,dim,segs[dim], 987 dim-1-edge,segs[dim-1-edge]); 988 */ 989 dest = my_id^mask; 990 if (my_id < dest) 991 {csend(MSGTAG4+my_id,(char *)vals+segs[dim-1-edge],len,dest,0);} 992 else 993 {crecv(MSGTAG4+dest,(char *)vals+segs[dim-1-edge],len);} 994 } 995 996 #elif defined MPISRC 997 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 998 { 999 n = segs[dim]-segs[edge]; 1000 dest = my_id^mask; 1001 if (my_id > dest) 1002 {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1003 else 1004 { 1005 MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest, 1006 MPI_COMM_WORLD, &status); 1007 (*fp)(vals+segs[edge], work, n, oprs); 1008 } 1009 } 1010 1011 if (edge==dim) 1012 {mask>>=1;} 1013 else 1014 {while (++edge<dim) {mask<<=1;}} 1015 1016 for (edge=0; edge<dim; edge++,mask>>=1) 1017 { 1018 if (my_id%mask) 1019 {continue;} 1020 1021 n = (segs[dim]-segs[dim-1-edge]); 1022 1023 dest = my_id^mask; 1024 if (my_id < dest) 1025 {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id, 1026 MPI_COMM_WORLD);} 1027 else 1028 { 1029 MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE, 1030 MSGTAG4+dest,MPI_COMM_WORLD, &status); 1031 } 1032 } 1033 #else 1034 return; 1035 #endif 1036 } 1037 1038 1039 #ifdef INPROG 1040 1041 /***********************************comm.c************************************* 1042 Function: proc_sync() 1043 1044 Input : 1045 Output: 1046 Return: 1047 Description: hc bassed version 1048 ***********************************comm.c*************************************/ 1049 void 1050 proc_sync() 1051 { 1052 register int mask, edge; 1053 int type, dest; 1054 #if defined MPISRC 1055 MPI_Status status; 1056 #endif 1057 1058 1059 #ifdef DEBUG 1060 error_msg_warning("begin proc_sync()\n"); 1061 #endif 1062 1063 #ifdef SAFE 1064 /* check to make sure comm package has been initialized */ 1065 if (!p_init) 1066 {comm_init();} 1067 #endif 1068 1069 #if defined NXSRC 1070 /* all msgs will be of zero length */ 1071 1072 /* if not a hypercube must colapse partial dim */ 1073 if (edge_not_pow_2) 1074 { 1075 if (my_id >= floor_num_nodes) 1076 {csend(MSGTAG0+my_id,(char *)vals,len,edge_not_pow_2,0);} 1077 else 1078 {crecv(MSGTAG0+edge_not_pow_2,(char *)work,len); (*fp)(vals,work,n,oprs);} 1079 } 1080 1081 /* implement the mesh fan in/out exchange algorithm */ 1082 if (my_id<floor_num_nodes) 1083 { 1084 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1085 { 1086 dest = my_id^mask; 1087 if (my_id > dest) 1088 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 1089 else 1090 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 1091 } 1092 1093 mask=floor_num_nodes>>1; 1094 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1095 { 1096 if (my_id%mask) 1097 {continue;} 1098 1099 dest = my_id^mask; 1100 if (my_id < dest) 1101 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 1102 else 1103 {crecv(MSGTAG4+dest,(char *)vals,len);} 1104 } 1105 } 1106 1107 /* if not a hypercube must expand to partial dim */ 1108 if (edge_not_pow_2) 1109 { 1110 if (my_id >= floor_num_nodes) 1111 {crecv(MSGTAG5+edge_not_pow_2,(char *)vals,len);} 1112 else 1113 {csend(MSGTAG5+my_id,(char *)vals,len,edge_not_pow_2,0);} 1114 } 1115 1116 #elif defined MPISRC 1117 /* all msgs will be of the same length */ 1118 /* if not a hypercube must colapse partial dim */ 1119 if (edge_not_pow_2) 1120 { 1121 if (my_id >= floor_num_nodes) 1122 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);} 1123 else 1124 { 1125 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, 1126 MPI_COMM_WORLD,&status); 1127 (*fp)(vals,work,n,oprs); 1128 } 1129 } 1130 1131 /* implement the mesh fan in/out exchange algorithm */ 1132 if (my_id<floor_num_nodes) 1133 { 1134 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 1135 { 1136 dest = my_id^mask; 1137 if (my_id > dest) 1138 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 1139 else 1140 { 1141 MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest, 1142 MPI_COMM_WORLD, &status); 1143 (*fp)(vals, work, n, oprs); 1144 } 1145 } 1146 1147 mask=floor_num_nodes>>1; 1148 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 1149 { 1150 if (my_id%mask) 1151 {continue;} 1152 1153 dest = my_id^mask; 1154 if (my_id < dest) 1155 {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 1156 else 1157 { 1158 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest, 1159 MPI_COMM_WORLD, &status); 1160 } 1161 } 1162 } 1163 1164 /* if not a hypercube must expand to partial dim */ 1165 if (edge_not_pow_2) 1166 { 1167 if (my_id >= floor_num_nodes) 1168 { 1169 MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, 1170 MPI_COMM_WORLD,&status); 1171 } 1172 else 1173 {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);} 1174 } 1175 #else 1176 return; 1177 #endif 1178 } 1179 #endif 1180 1181 1182 /* hmt hack */ 1183 PetscErrorCode hmt_xor_ (register int *i1, register int *i2) 1184 { 1185 return(*i1^*i2); 1186 } 1187 1188 1189 /****************************************************************************** 1190 Function: giop() 1191 1192 Input : 1193 Output: 1194 Return: 1195 Description: 1196 1197 ii+1 entries in seg :: 0 .. ii 1198 1199 ******************************************************************************/ 1200 void 1201 staged_gs_ (register REAL *vals, register REAL *work, register int *level, 1202 register int *segs) 1203 { 1204 ssgl_radd(vals, work, *level, segs); 1205 } 1206 1207 /****************************************************************************** 1208 Function: giop() 1209 1210 Input : 1211 Output: 1212 Return: 1213 Description: 1214 ******************************************************************************/ 1215 void 1216 staged_iadd_ (int *gl_num, int *level) 1217 { 1218 sgl_iadd(gl_num,*level); 1219 } 1220 1221 1222 1223 /****************************************************************************** 1224 Function: giop() 1225 1226 Input : 1227 Output: 1228 Return: 1229 Description: 1230 ******************************************************************************/ 1231 static void 1232 sgl_iadd(int *vals, int level) 1233 { 1234 int edge, type, dest, source, len, mask = -1; 1235 int tmp, *work; 1236 1237 1238 #ifdef SAFE 1239 /* check to make sure comm package has been initialized */ 1240 if (!p_init) 1241 {comm_init();} 1242 #endif 1243 1244 1245 /* all msgs will be of the same length */ 1246 work = &tmp; 1247 len = INT_LEN; 1248 1249 if (level > i_log2_num_nodes) 1250 {error_msg_fatal("sgl_add() :: level too big?");} 1251 1252 if (level<=0) 1253 {return;} 1254 1255 #if defined NXSRC 1256 { 1257 long msg_id; 1258 /* implement the mesh fan in/out exchange algorithm */ 1259 if (my_id<floor_num_nodes) 1260 { 1261 mask = 0; 1262 for (edge = 0; edge < level; edge++) 1263 { 1264 if (!(my_id & mask)) 1265 { 1266 source = dest = edge_node[edge]; 1267 type = MSGTAG1 + my_id + (num_nodes*edge); 1268 if (my_id > dest) 1269 { 1270 msg_id = isend(type,vals,len,dest,0); 1271 msgwait(msg_id); 1272 } 1273 else 1274 { 1275 type = type - my_id + source; 1276 msg_id = irecv(type,work,len); 1277 msgwait(msg_id); 1278 vals[0] += work[0]; 1279 } 1280 } 1281 mask <<= 1; 1282 mask += 1; 1283 } 1284 } 1285 1286 if (my_id<floor_num_nodes) 1287 { 1288 mask >>= 1; 1289 for (edge = 0; edge < level; edge++) 1290 { 1291 if (!(my_id & mask)) 1292 { 1293 source = dest = edge_node[level-edge-1]; 1294 type = MSGTAG1 + my_id + (num_nodes*edge); 1295 if (my_id < dest) 1296 { 1297 msg_id = isend(type,vals,len,dest,0); 1298 msgwait(msg_id); 1299 } 1300 else 1301 { 1302 type = type - my_id + source; 1303 msg_id = irecv(type,work,len); 1304 msgwait(msg_id); 1305 vals[0] = work[0]; 1306 } 1307 } 1308 mask >>= 1; 1309 } 1310 } 1311 } 1312 #elif defined MPISRC 1313 { 1314 MPI_Request msg_id; 1315 MPI_Status status; 1316 1317 /* implement the mesh fan in/out exchange algorithm */ 1318 if (my_id<floor_num_nodes) 1319 { 1320 mask = 0; 1321 for (edge = 0; edge < level; edge++) 1322 { 1323 if (!(my_id & mask)) 1324 { 1325 source = dest = edge_node[edge]; 1326 type = MSGTAG1 + my_id + (num_nodes*edge); 1327 if (my_id > dest) 1328 { 1329 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1330 &msg_id); 1331 MPI_Wait(&msg_id,&status); 1332 /* msg_id = isend(type,vals,len,dest,0); 1333 msgwait(msg_id); */ 1334 } 1335 else 1336 { 1337 type = type - my_id + source; 1338 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1339 type,MPI_COMM_WORLD,&msg_id); 1340 MPI_Wait(&msg_id,&status); 1341 /* msg_id = irecv(type,work,len); 1342 msgwait(msg_id); */ 1343 vals[0] += work[0]; 1344 } 1345 } 1346 mask <<= 1; 1347 mask += 1; 1348 } 1349 } 1350 1351 if (my_id<floor_num_nodes) 1352 { 1353 mask >>= 1; 1354 for (edge = 0; edge < level; edge++) 1355 { 1356 if (!(my_id & mask)) 1357 { 1358 source = dest = edge_node[level-edge-1]; 1359 type = MSGTAG1 + my_id + (num_nodes*edge); 1360 if (my_id < dest) 1361 { 1362 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1363 &msg_id); 1364 MPI_Wait(&msg_id,&status); 1365 /* msg_id = isend(type,vals,len,dest,0); 1366 msgwait(msg_id);*/ 1367 } 1368 else 1369 { 1370 type = type - my_id + source; 1371 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1372 type,MPI_COMM_WORLD,&msg_id); 1373 MPI_Wait(&msg_id,&status); 1374 /* msg_id = irecv(type,work,len); 1375 msgwait(msg_id); */ 1376 vals[0] = work[0]; 1377 } 1378 } 1379 mask >>= 1; 1380 } 1381 } 1382 } 1383 #else 1384 return; 1385 #endif 1386 1387 } 1388 1389 1390 1391 /****************************************************************************** 1392 Function: giop() 1393 1394 Input : 1395 Output: 1396 Return: 1397 Description: 1398 ******************************************************************************/ 1399 void 1400 staged_radd_ (double *gl_num, int *level) 1401 { 1402 sgl_radd(gl_num,*level); 1403 } 1404 1405 /****************************************************************************** 1406 Function: giop() 1407 1408 Input : 1409 Output: 1410 Return: 1411 Description: 1412 ******************************************************************************/ 1413 static void 1414 sgl_radd(double *vals, int level) 1415 { 1416 #if defined NXSRC 1417 int edge, type, dest, source, len, mask; 1418 double tmp, *work; 1419 #endif 1420 1421 #ifdef SAFE 1422 /* check to make sure comm package has been initialized */ 1423 if (!p_init) 1424 {comm_init();} 1425 #endif 1426 1427 1428 #if defined NXSRC 1429 { 1430 long msg_id; 1431 1432 /* all msgs will be of the same length */ 1433 work = &tmp; 1434 len = REAL_LEN; 1435 1436 if (level > i_log2_num_nodes) 1437 {error_msg_fatal("sgl_add() :: level too big?");} 1438 1439 if (level<=0) 1440 {return;} 1441 1442 /* implement the mesh fan in/out exchange algorithm */ 1443 if (my_id<floor_num_nodes) 1444 { 1445 mask = 0; 1446 for (edge = 0; edge < level; edge++) 1447 { 1448 if (!(my_id & mask)) 1449 { 1450 source = dest = edge_node[edge]; 1451 type = MSGTAG3 + my_id + (num_nodes*edge); 1452 if (my_id > dest) 1453 { 1454 msg_id = isend(type,vals,len,dest,0); 1455 msgwait(msg_id); 1456 } 1457 else 1458 { 1459 type = type - my_id + source; 1460 msg_id = irecv(type,work,len); 1461 msgwait(msg_id); 1462 vals[0] += work[0]; 1463 } 1464 } 1465 mask <<= 1; 1466 mask += 1; 1467 } 1468 } 1469 1470 if (my_id<floor_num_nodes) 1471 { 1472 mask >>= 1; 1473 for (edge = 0; edge < level; edge++) 1474 { 1475 if (!(my_id & mask)) 1476 { 1477 source = dest = edge_node[level-edge-1]; 1478 type = MSGTAG3 + my_id + (num_nodes*edge); 1479 if (my_id < dest) 1480 { 1481 msg_id = isend(type,vals,len,dest,0); 1482 msgwait(msg_id); 1483 } 1484 else 1485 { 1486 type = type - my_id + source; 1487 msg_id = irecv(type,work,len); 1488 msgwait(msg_id); 1489 vals[0] = work[0]; 1490 } 1491 } 1492 mask >>= 1; 1493 } 1494 } 1495 } 1496 #elif defined MRISRC 1497 { 1498 MPI_Request msg_id; 1499 MPI_Status status; 1500 1501 /* all msgs will be of the same length */ 1502 work = &tmp; 1503 len = REAL_LEN; 1504 1505 if (level > i_log2_num_nodes) 1506 {error_msg_fatal("sgl_add() :: level too big?");} 1507 1508 if (level<=0) 1509 {return;} 1510 1511 /* implement the mesh fan in/out exchange algorithm */ 1512 if (my_id<floor_num_nodes) 1513 { 1514 mask = 0; 1515 for (edge = 0; edge < level; edge++) 1516 { 1517 if (!(my_id & mask)) 1518 { 1519 source = dest = edge_node[edge]; 1520 type = MSGTAG3 + my_id + (num_nodes*edge); 1521 if (my_id > dest) 1522 { 1523 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1524 &msg_id); 1525 MPI_Wait(&msg_id,&status); 1526 /*msg_id = isend(type,vals,len,dest,0); 1527 msgwait(msg_id);*/ 1528 } 1529 else 1530 { 1531 type = type - my_id + source; 1532 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1533 type,MPI_COMM_WORLD,&msg_id); 1534 MPI_Wait(&msg_id,&status); 1535 /*msg_id = irecv(type,work,len); 1536 msgwait(msg_id); */ 1537 vals[0] += work[0]; 1538 } 1539 } 1540 mask <<= 1; 1541 mask += 1; 1542 } 1543 } 1544 1545 if (my_id<floor_num_nodes) 1546 { 1547 mask >>= 1; 1548 for (edge = 0; edge < level; edge++) 1549 { 1550 if (!(my_id & mask)) 1551 { 1552 source = dest = edge_node[level-edge-1]; 1553 type = MSGTAG3 + my_id + (num_nodes*edge); 1554 if (my_id < dest) 1555 { 1556 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1557 &msg_id); 1558 MPI_Wait(&msg_id,&status); 1559 /* msg_id = isend(type,vals,len,dest,0); 1560 msgwait(msg_id); */ 1561 } 1562 else 1563 { 1564 type = type - my_id + source; 1565 MPI_Irecv(work,len,INT_TYPE,MPI_ANY_SOURCE, 1566 type,MPI_COMM_WORLD,&msg_id); 1567 MPI_Wait(&msg_id,&status); 1568 /*msg_id = irecv(type,work,len); 1569 msgwait(msg_id); */ 1570 vals[0] = work[0]; 1571 } 1572 } 1573 mask >>= 1; 1574 } 1575 } 1576 } 1577 #else 1578 return; 1579 #endif 1580 } 1581 1582 1583 1584 /****************************************************************************** 1585 Function: giop() 1586 1587 Input : 1588 Output: 1589 Return: 1590 Description: 1591 1592 ii+1 entries in seg :: 0 .. ii 1593 ******************************************************************************/ 1594 void 1595 hmt_concat_ (REAL *vals, REAL *work, int *size) 1596 { 1597 hmt_concat(vals, work, *size); 1598 } 1599 1600 1601 1602 /****************************************************************************** 1603 Function: giop() 1604 1605 Input : 1606 Output: 1607 Return: 1608 Description: 1609 1610 ii+1 entries in seg :: 0 .. ii 1611 1612 ******************************************************************************/ 1613 static void 1614 hmt_concat(REAL *vals, REAL *work, int size) 1615 { 1616 #if defined NXSRC 1617 int mask,stage_n; 1618 int edge, type, dest, source, len; 1619 int offset; 1620 double *dptr; 1621 #endif 1622 1623 #ifdef SAFE 1624 /* check to make sure comm package has been initialized */ 1625 if (!p_init) 1626 {comm_init();} 1627 #endif 1628 1629 /* all msgs are *NOT* the same length */ 1630 /* implement the mesh fan in/out exchange algorithm */ 1631 rvec_copy(work,vals,size); 1632 1633 #if defined NXSRC 1634 mask = 0; 1635 stage_n = size; 1636 { 1637 long msg_id; 1638 1639 dptr = work+size; 1640 for (edge = 0; edge < i_log2_num_nodes; edge++) 1641 { 1642 len = stage_n * REAL_LEN; 1643 1644 if (!(my_id & mask)) 1645 { 1646 source = dest = edge_node[edge]; 1647 type = MSGTAG3 + my_id + (num_nodes*edge); 1648 if (my_id > dest) 1649 { 1650 msg_id = isend(type, work, len,dest,0); 1651 msgwait(msg_id); 1652 } 1653 else 1654 { 1655 type = type - my_id + source; 1656 msg_id = irecv(type, dptr,len); 1657 msgwait(msg_id); 1658 } 1659 } 1660 1661 #ifdef DEBUG_1 1662 ierror_msg_warning_n0("stage_n = ",stage_n); 1663 #endif 1664 1665 dptr += stage_n; 1666 stage_n <<=1; 1667 mask <<= 1; 1668 mask += 1; 1669 } 1670 1671 size = stage_n; 1672 stage_n >>=1; 1673 dptr -= stage_n; 1674 1675 mask >>= 1; 1676 1677 for (edge = 0; edge < i_log2_num_nodes; edge++) 1678 { 1679 len = (size-stage_n) * REAL_LEN; 1680 1681 if (!(my_id & mask) && stage_n) 1682 { 1683 source = dest = edge_node[i_log2_num_nodes-edge-1]; 1684 type = MSGTAG6 + my_id + (num_nodes*edge); 1685 if (my_id < dest) 1686 { 1687 msg_id = isend(type,dptr,len,dest,0); 1688 msgwait(msg_id); 1689 } 1690 else 1691 { 1692 type = type - my_id + source; 1693 msg_id = irecv(type,dptr,len); 1694 msgwait(msg_id); 1695 } 1696 } 1697 1698 #ifdef DEBUG_1 1699 ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1700 #endif 1701 1702 stage_n >>= 1; 1703 dptr -= stage_n; 1704 mask >>= 1; 1705 } 1706 } 1707 #elif defined MRISRC 1708 { 1709 MPI_Request msg_id; 1710 MPI_Status status; 1711 1712 dptr = work+size; 1713 for (edge = 0; edge < i_log2_num_nodes; edge++) 1714 { 1715 len = stage_n * REAL_LEN; 1716 1717 if (!(my_id & mask)) 1718 { 1719 source = dest = edge_node[edge]; 1720 type = MSGTAG3 + my_id + (num_nodes*edge); 1721 if (my_id > dest) 1722 { 1723 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1724 &msg_id); 1725 MPI_Wait(&msg_id,&status); 1726 /*msg_id = isend(type, work, len,dest,0); 1727 msgwait(msg_id);*/ 1728 } 1729 else 1730 { 1731 type = type - my_id + source; 1732 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1733 type,MPI_COMM_WORLD,&msg_id); 1734 MPI_Wait(&msg_id,&status); 1735 /* msg_id = irecv(type, dptr,len); 1736 msgwait(msg_id);*/ 1737 } 1738 } 1739 1740 #ifdef DEBUG_1 1741 ierror_msg_warning_n0("stage_n = ",stage_n); 1742 #endif 1743 1744 dptr += stage_n; 1745 stage_n <<=1; 1746 mask <<= 1; 1747 mask += 1; 1748 } 1749 1750 size = stage_n; 1751 stage_n >>=1; 1752 dptr -= stage_n; 1753 1754 mask >>= 1; 1755 1756 for (edge = 0; edge < i_log2_num_nodes; edge++) 1757 { 1758 len = (size-stage_n) * REAL_LEN; 1759 1760 if (!(my_id & mask) && stage_n) 1761 { 1762 source = dest = edge_node[i_log2_num_nodes-edge-1]; 1763 type = MSGTAG6 + my_id + (num_nodes*edge); 1764 if (my_id < dest) 1765 { 1766 MPI_Isend(vals,len,INT_TYPE,dest,type,MPI_COMM_WORLD, 1767 &msg_id); 1768 MPI_Wait(&msg_id,&status); 1769 /*msg_id = isend(type, work, len,dest,0); 1770 msg_id = isend(type,dptr,len,dest,0); */ 1771 msgwait(msg_id); 1772 } 1773 else 1774 { 1775 type = type - my_id + source; 1776 MPI_Irecv(dptr,len,INT_TYPE,MPI_ANY_SOURCE, 1777 type,MPI_COMM_WORLD,&msg_id); 1778 MPI_Wait(&msg_id,&status); 1779 /*msg_id = irecv(type,dptr,len); 1780 msgwait(msg_id);*/ 1781 } 1782 } 1783 1784 #ifdef DEBUG_1 1785 ierror_msg_warning_n0("size-stage_n = ",size-stage_n); 1786 #endif 1787 1788 stage_n >>= 1; 1789 dptr -= stage_n; 1790 mask >>= 1; 1791 } 1792 } 1793 #else 1794 return; 1795 #endif 1796 } 1797 1798 1799 1800 /****************************************************************************** 1801 Function: giop() 1802 1803 Input : 1804 Output: 1805 Return: 1806 Description: 1807 1808 ii+1 entries in seg :: 0 .. ii 1809 1810 ******************************************************************************/ 1811 void 1812 new_ssgl_radd(register REAL *vals, register REAL *work, register int level, 1813 #if defined MPISRC 1814 register int *segs, MPI_Comm ssgl_comm) 1815 #else 1816 register int *segs) 1817 #endif 1818 { 1819 register int edge, type, dest, mask; 1820 register int stage_n; 1821 #if defined MPISRC 1822 MPI_Status status; 1823 #endif 1824 1825 1826 #ifdef DEBUG 1827 if (level > i_log2_num_nodes) 1828 {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");} 1829 #endif 1830 1831 #ifdef SAFE 1832 /* check to make sure comm package has been initialized */ 1833 if (!p_init) 1834 {comm_init();} 1835 #endif 1836 1837 1838 #if defined NXSRC 1839 /* all msgs are *NOT* the same length */ 1840 /* implement the mesh fan in/out exchange algorithm */ 1841 for (mask=0, edge=0; edge<level; edge++, mask++) 1842 { 1843 stage_n = (segs[level] - segs[edge]); 1844 if (stage_n && !(my_id & mask)) 1845 { 1846 dest = edge_node[edge]; 1847 type = MSGTAG3 + my_id + (num_nodes*edge); 1848 if (my_id>dest) 1849 {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} 1850 else 1851 { 1852 type = type - my_id + dest; 1853 crecv(type,work,stage_n*REAL_LEN); 1854 rvec_add(vals+segs[edge], work, stage_n); 1855 /* daxpy(vals+segs[edge], work, stage_n); */ 1856 } 1857 } 1858 mask <<= 1; 1859 } 1860 mask>>=1; 1861 for (edge=0; edge<level; edge++) 1862 { 1863 stage_n = (segs[level] - segs[level-1-edge]); 1864 if (stage_n && !(my_id & mask)) 1865 { 1866 dest = edge_node[level-edge-1]; 1867 type = MSGTAG6 + my_id + (num_nodes*edge); 1868 if (my_id<dest) 1869 {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} 1870 else 1871 { 1872 type = type - my_id + dest; 1873 crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); 1874 } 1875 } 1876 mask >>= 1; 1877 } 1878 1879 #elif defined MPISRC 1880 /* all msgs are *NOT* the same length */ 1881 /* implement the mesh fan in/out exchange algorithm */ 1882 for (mask=0, edge=0; edge<level; edge++, mask++) 1883 { 1884 stage_n = (segs[level] - segs[edge]); 1885 if (stage_n && !(my_id & mask)) 1886 { 1887 dest = edge_node[edge]; 1888 type = MSGTAG3 + my_id + (num_nodes*edge); 1889 if (my_id>dest) 1890 /* {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */ 1891 {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type, 1892 ssgl_comm);} 1893 else 1894 { 1895 type = type - my_id + dest; 1896 /* crecv(type,work,stage_n*REAL_LEN); */ 1897 MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type, 1898 ssgl_comm,&status); 1899 rvec_add(vals+segs[edge], work, stage_n); 1900 /* daxpy(vals+segs[edge], work, stage_n); */ 1901 } 1902 } 1903 mask <<= 1; 1904 } 1905 mask>>=1; 1906 for (edge=0; edge<level; edge++) 1907 { 1908 stage_n = (segs[level] - segs[level-1-edge]); 1909 if (stage_n && !(my_id & mask)) 1910 { 1911 dest = edge_node[level-edge-1]; 1912 type = MSGTAG6 + my_id + (num_nodes*edge); 1913 if (my_id<dest) 1914 /* {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */ 1915 {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type, 1916 ssgl_comm);} 1917 else 1918 { 1919 type = type - my_id + dest; 1920 /* crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */ 1921 MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE, 1922 MPI_ANY_SOURCE,type,ssgl_comm,&status); 1923 } 1924 } 1925 mask >>= 1; 1926 } 1927 #else 1928 return; 1929 #endif 1930 } 1931 1932 1933 1934 /***********************************comm.c************************************* 1935 Function: giop() 1936 1937 Input : 1938 Output: 1939 Return: 1940 Description: fan-in/out version 1941 1942 note good only for num_nodes=2^k!!! 1943 1944 ***********************************comm.c*************************************/ 1945 void 1946 giop_hc(int *vals, int *work, int n, int *oprs, int dim) 1947 { 1948 register int mask, edge; 1949 int type, dest; 1950 vfp fp; 1951 #if defined MPISRC 1952 MPI_Status status; 1953 #elif defined NXSRC 1954 int len; 1955 #endif 1956 1957 1958 #ifdef SAFE 1959 /* ok ... should have some data, work, and operator(s) */ 1960 if (!vals||!work||!oprs) 1961 {error_msg_fatal("giop_hc() :: vals=%d, work=%d, oprs=%d",vals,work,oprs);} 1962 1963 /* non-uniform should have at least two entries */ 1964 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 1965 {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");} 1966 1967 /* check to make sure comm package has been initialized */ 1968 if (!p_init) 1969 {comm_init();} 1970 #endif 1971 1972 /* if there's nothing to do return */ 1973 if ((num_nodes<2)||(!n)||(dim<=0)) 1974 {return;} 1975 1976 /* the error msg says it all!!! */ 1977 if (modfl_num_nodes) 1978 {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");} 1979 1980 /* a negative number of items to send ==> fatal */ 1981 if (n<0) 1982 {error_msg_fatal("giop_hc() :: n=%d<0?",n);} 1983 1984 /* can't do more dimensions then exist */ 1985 dim = MIN(dim,i_log2_num_nodes); 1986 1987 /* advance to list of n operations for custom */ 1988 if ((type=oprs[0])==NON_UNIFORM) 1989 {oprs++;} 1990 1991 if (!(fp = (vfp) ivec_fct_addr(type))){ 1992 error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n"); 1993 fp = (vfp) oprs; 1994 } 1995 1996 #if defined NXSRC 1997 /* all msgs will be of the same length */ 1998 len = n*INT_LEN; 1999 2000 /* implement the mesh fan in/out exchange algorithm */ 2001 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2002 { 2003 dest = my_id^mask; 2004 if (my_id > dest) 2005 {csend(MSGTAG2+my_id,(char *)vals,len,dest,0); break;} 2006 else 2007 {crecv(MSGTAG2+dest,(char *)work,len); (*fp)(vals, work, n, oprs);} 2008 } 2009 2010 /* for (mask=1, edge=1; edge<dim; edge++,mask<<=1) {;} */ 2011 if (edge==dim) 2012 {mask>>=1;} 2013 else 2014 {while (++edge<dim) {mask<<=1;}} 2015 2016 for (edge=0; edge<dim; edge++,mask>>=1) 2017 { 2018 if (my_id%mask) 2019 {continue;} 2020 2021 dest = my_id^mask; 2022 if (my_id < dest) 2023 {csend(MSGTAG4+my_id,(char *)vals,len,dest,0);} 2024 else 2025 {crecv(MSGTAG4+dest,(char *)vals,len);} 2026 } 2027 2028 #elif defined MPISRC 2029 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 2030 { 2031 dest = my_id^mask; 2032 if (my_id > dest) 2033 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);} 2034 else 2035 { 2036 MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, 2037 &status); 2038 (*fp)(vals, work, n, oprs); 2039 } 2040 } 2041 2042 if (edge==dim) 2043 {mask>>=1;} 2044 else 2045 {while (++edge<dim) {mask<<=1;}} 2046 2047 for (edge=0; edge<dim; edge++,mask>>=1) 2048 { 2049 if (my_id%mask) 2050 {continue;} 2051 2052 dest = my_id^mask; 2053 if (my_id < dest) 2054 {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);} 2055 else 2056 { 2057 MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, 2058 &status); 2059 } 2060 } 2061 #else 2062 return; 2063 #endif 2064 } 2065