1 #define PETSCKSP_DLL 2 3 /***********************************gs.c*************************************** 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 6.21.97 16 ************************************gs.c**************************************/ 17 18 /***********************************gs.c*************************************** 19 File Description: 20 ----------------- 21 22 ************************************gs.c**************************************/ 23 24 #include "src/ksp/pc/impls/tfs/tfs.h" 25 26 /* default length of number of items via tree - doubles if exceeded */ 27 #define TREE_BUF_SZ 2048; 28 #define GS_VEC_SZ 1 29 30 31 32 /***********************************gs.c*************************************** 33 Type: struct gather_scatter_id 34 ------------------------------ 35 36 ************************************gs.c**************************************/ 37 typedef struct gather_scatter_id { 38 int id; 39 int nel_min; 40 int nel_max; 41 int nel_sum; 42 int negl; 43 int gl_max; 44 int gl_min; 45 int repeats; 46 int ordered; 47 int positive; 48 PetscScalar *vals; 49 50 /* bit mask info */ 51 int *my_proc_mask; 52 int mask_sz; 53 int *ngh_buf; 54 int ngh_buf_sz; 55 int *nghs; 56 int num_nghs; 57 int max_nghs; 58 int *pw_nghs; 59 int num_pw_nghs; 60 int *tree_nghs; 61 int num_tree_nghs; 62 63 int num_loads; 64 65 /* repeats == true -> local info */ 66 int nel; /* number of unique elememts */ 67 int *elms; /* of size nel */ 68 int nel_total; 69 int *local_elms; /* of size nel_total */ 70 int *companion; /* of size nel_total */ 71 72 /* local info */ 73 int num_local_total; 74 int local_strength; 75 int num_local; 76 int *num_local_reduce; 77 int **local_reduce; 78 int num_local_gop; 79 int *num_gop_local_reduce; 80 int **gop_local_reduce; 81 82 /* pairwise info */ 83 int level; 84 int num_pairs; 85 int max_pairs; 86 int loc_node_pairs; 87 int max_node_pairs; 88 int min_node_pairs; 89 int avg_node_pairs; 90 int *pair_list; 91 int *msg_sizes; 92 int **node_list; 93 int len_pw_list; 94 int *pw_elm_list; 95 PetscScalar *pw_vals; 96 97 MPI_Request *msg_ids_in; 98 MPI_Request *msg_ids_out; 99 100 PetscScalar *out; 101 PetscScalar *in; 102 int msg_total; 103 104 /* tree - crystal accumulator info */ 105 int max_left_over; 106 int *pre; 107 int *in_num; 108 int *out_num; 109 int **in_list; 110 int **out_list; 111 112 /* new tree work*/ 113 int tree_nel; 114 int *tree_elms; 115 PetscScalar *tree_buf; 116 PetscScalar *tree_work; 117 118 int tree_map_sz; 119 int *tree_map_in; 120 int *tree_map_out; 121 122 /* current memory status */ 123 int gl_bss_min; 124 int gl_perm_min; 125 126 /* max segment size for gs_gop_vec() */ 127 int vec_sz; 128 129 /* hack to make paul happy */ 130 MPI_Comm gs_comm; 131 132 } gs_id; 133 134 135 /* to be made public */ 136 137 /* PRIVATE - and definitely not exported */ 138 /*static void gs_print_template( gs_id* gs, int who);*/ 139 /*static void gs_print_stemplate( gs_id* gs, int who);*/ 140 141 static gs_id *gsi_check_args(int *elms, int nel, int level); 142 static void gsi_via_bit_mask(gs_id *gs); 143 static void get_ngh_buf(gs_id *gs); 144 static void set_pairwise(gs_id *gs); 145 static gs_id * gsi_new(void); 146 static void set_tree(gs_id *gs); 147 148 /* same for all but vector flavor */ 149 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals); 150 /* vector flavor */ 151 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step); 152 153 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step); 154 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step); 155 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step); 156 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step); 157 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step); 158 159 160 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals); 161 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals); 162 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals); 163 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals); 164 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals); 165 166 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 167 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim); 168 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim); 169 170 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals); 171 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals); 172 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals); 173 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals); 174 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals); 175 176 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals); 177 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals); 178 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals); 179 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals); 180 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals); 181 182 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals); 183 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals); 184 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals); 185 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals); 186 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals); 187 188 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals); 189 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals); 190 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals); 191 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals); 192 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals); 193 194 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals); 195 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals); 196 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals); 197 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals); 198 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals); 199 200 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals); 201 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals); 202 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals); 203 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals); 204 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals); 205 206 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct); 207 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 208 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 209 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct); 210 211 212 213 /* global vars */ 214 /* from comm.c module */ 215 216 /* module state inf and fortran interface */ 217 static int num_gs_ids = 0; 218 219 /* should make this dynamic ... later */ 220 static int msg_buf=MAX_MSG_BUF; 221 static int vec_sz=GS_VEC_SZ; 222 static int *tree_buf=NULL; 223 static int tree_buf_sz=0; 224 static int ntree=0; 225 226 227 /****************************************************************************** 228 Function: gs_init_() 229 230 Input : 231 Output: 232 Return: 233 Description: 234 ******************************************************************************/ 235 void gs_init_vec_sz(int size) 236 { 237 /* vec_ch = TRUE; */ 238 239 vec_sz = size; 240 } 241 242 /****************************************************************************** 243 Function: gs_init_() 244 245 Input : 246 Output: 247 Return: 248 Description: 249 ******************************************************************************/ 250 void gs_init_msg_buf_sz(int buf_size) 251 { 252 /* msg_ch = TRUE; */ 253 254 msg_buf = buf_size; 255 } 256 257 /****************************************************************************** 258 Function: gs_init() 259 260 Input : 261 262 Output: 263 264 RETURN: 265 266 Description: 267 ******************************************************************************/ 268 gs_id * 269 gs_init( int *elms, int nel, int level) 270 { 271 gs_id *gs; 272 MPI_Group gs_group; 273 MPI_Comm gs_comm; 274 275 /* ensure that communication package has been initialized */ 276 comm_init(); 277 278 279 /* determines if we have enough dynamic/semi-static memory */ 280 /* checks input, allocs and sets gd_id template */ 281 gs = gsi_check_args(elms,nel,level); 282 283 /* only bit mask version up and working for the moment */ 284 /* LATER :: get int list version working for sparse pblms */ 285 gsi_via_bit_mask(gs); 286 287 288 MPI_Comm_group(MPI_COMM_WORLD,&gs_group); 289 MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm); 290 gs->gs_comm=gs_comm; 291 292 return(gs); 293 } 294 295 296 297 /****************************************************************************** 298 Function: gsi_new() 299 300 Input : 301 Output: 302 Return: 303 Description: 304 305 elm list must >= 0!!! 306 elm repeats allowed 307 ******************************************************************************/ 308 static 309 gs_id * 310 gsi_new(void) 311 { 312 gs_id *gs; 313 gs = (gs_id *) malloc(sizeof(gs_id)); 314 PetscMemzero(gs,sizeof(gs_id)); 315 return(gs); 316 } 317 318 319 320 /****************************************************************************** 321 Function: gsi_check_args() 322 323 Input : 324 Output: 325 Return: 326 Description: 327 328 elm list must >= 0!!! 329 elm repeats allowed 330 local working copy of elms is sorted 331 ******************************************************************************/ 332 static 333 gs_id * 334 gsi_check_args(int *in_elms, int nel, int level) 335 { 336 int i, j, k, t2; 337 int *companion, *elms, *unique, *iptr; 338 int num_local=0, *num_to_reduce, **local_reduce; 339 int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND}; 340 int vals[sizeof(oprs)/sizeof(oprs[0])-1]; 341 int work[sizeof(oprs)/sizeof(oprs[0])-1]; 342 gs_id *gs; 343 344 345 346 #ifdef SAFE 347 if (!in_elms) 348 {error_msg_fatal("elms point to nothing!!!\n");} 349 350 if (nel<0) 351 {error_msg_fatal("can't have fewer than 0 elms!!!\n");} 352 353 if (nel==0) 354 {error_msg_warning("I don't have any elements!!!\n");} 355 #endif 356 357 /* get space for gs template */ 358 gs = gsi_new(); 359 gs->id = ++num_gs_ids; 360 361 /* hmt 6.4.99 */ 362 /* caller can set global ids that don't participate to 0 */ 363 /* gs_init ignores all zeros in elm list */ 364 /* negative global ids are still invalid */ 365 for (i=j=0;i<nel;i++) 366 {if (in_elms[i]!=0) {j++;}} 367 368 k=nel; nel=j; 369 370 /* copy over in_elms list and create inverse map */ 371 elms = (int*) malloc((nel+1)*sizeof(PetscInt)); 372 companion = (int*) malloc(nel*sizeof(PetscInt)); 373 /* ivec_c_index(companion,nel); */ 374 /* ivec_copy(elms,in_elms,nel); */ 375 for (i=j=0;i<k;i++) 376 { 377 if (in_elms[i]!=0) 378 {elms[j] = in_elms[i]; companion[j++] = i;} 379 } 380 381 if (j!=nel) 382 {error_msg_fatal("nel j mismatch!\n");} 383 384 #ifdef SAFE 385 /* pre-pass ... check to see if sorted */ 386 elms[nel] = INT_MAX; 387 iptr = elms; 388 unique = elms+1; 389 j=0; 390 while (*iptr!=INT_MAX) 391 { 392 if (*iptr++>*unique++) 393 {j=1; break;} 394 } 395 396 /* set up inverse map */ 397 if (j) 398 { 399 error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n"); 400 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 401 } 402 else 403 {error_msg_warning("gsi_check_args() :: elm list sorted!\n");} 404 #else 405 SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER); 406 #endif 407 elms[nel] = INT_MIN; 408 409 /* first pass */ 410 /* determine number of unique elements, check pd */ 411 for (i=k=0;i<nel;i+=j) 412 { 413 t2 = elms[i]; 414 j=++i; 415 416 /* clump 'em for now */ 417 while (elms[j]==t2) {j++;} 418 419 /* how many together and num local */ 420 if (j-=i) 421 {num_local++; k+=j;} 422 } 423 424 /* how many unique elements? */ 425 gs->repeats=k; 426 gs->nel = nel-k; 427 428 429 /* number of repeats? */ 430 gs->num_local = num_local; 431 num_local+=2; 432 gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*)); 433 gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt)); 434 435 unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt)); 436 gs->elms = unique; 437 gs->nel_total = nel; 438 gs->local_elms = elms; 439 gs->companion = companion; 440 441 /* compess map as well as keep track of local ops */ 442 for (num_local=i=j=0;i<gs->nel;i++) 443 { 444 k=j; 445 t2 = unique[i] = elms[j]; 446 companion[i] = companion[j]; 447 448 while (elms[j]==t2) {j++;} 449 450 if ((t2=(j-k))>1) 451 { 452 /* number together */ 453 num_to_reduce[num_local] = t2++; 454 iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt)); 455 456 /* to use binary searching don't remap until we check intersection */ 457 *iptr++ = i; 458 459 /* note that we're skipping the first one */ 460 while (++k<j) 461 {*(iptr++) = companion[k];} 462 *iptr = -1; 463 } 464 } 465 466 /* sentinel for ngh_buf */ 467 unique[gs->nel]=INT_MAX; 468 469 /* for two partition sort hack */ 470 num_to_reduce[num_local] = 0; 471 local_reduce[num_local] = NULL; 472 num_to_reduce[++num_local] = 0; 473 local_reduce[num_local] = NULL; 474 475 /* load 'em up */ 476 /* note one extra to hold NON_UNIFORM flag!!! */ 477 vals[2] = vals[1] = vals[0] = nel; 478 if (gs->nel>0) 479 { 480 vals[3] = unique[0]; /* ivec_lb(elms,nel); */ 481 vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */ 482 } 483 else 484 { 485 vals[3] = INT_MAX; /* ivec_lb(elms,nel); */ 486 vals[4] = INT_MIN; /* ivec_ub(elms,nel); */ 487 } 488 vals[5] = level; 489 vals[6] = num_gs_ids; 490 491 /* GLOBAL: send 'em out */ 492 giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs); 493 494 /* must be semi-pos def - only pairwise depends on this */ 495 /* LATER - remove this restriction */ 496 if (vals[3]<0) 497 {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);} 498 499 if (vals[4]==INT_MAX) 500 {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);} 501 502 gs->nel_min = vals[0]; 503 gs->nel_max = vals[1]; 504 gs->nel_sum = vals[2]; 505 gs->gl_min = vals[3]; 506 gs->gl_max = vals[4]; 507 gs->negl = vals[4]-vals[3]+1; 508 509 if (gs->negl<=0) 510 {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);} 511 512 /* LATER :: add level == -1 -> program selects level */ 513 if (vals[5]<0) 514 {vals[5]=0;} 515 else if (vals[5]>num_nodes) 516 {vals[5]=num_nodes;} 517 gs->level = vals[5]; 518 519 return(gs); 520 } 521 522 523 /****************************************************************************** 524 Function: gsi_via_bit_mask() 525 526 Input : 527 Output: 528 Return: 529 Description: 530 531 532 ******************************************************************************/ 533 static 534 void 535 gsi_via_bit_mask(gs_id *gs) 536 { 537 int i, nel, *elms; 538 int t1; 539 int **reduce; 540 int *map; 541 542 /* totally local removes ... ct_bits == 0 */ 543 get_ngh_buf(gs); 544 545 if (gs->level) 546 {set_pairwise(gs);} 547 548 if (gs->max_left_over) 549 {set_tree(gs);} 550 551 /* intersection local and pairwise/tree? */ 552 gs->num_local_total = gs->num_local; 553 gs->gop_local_reduce = gs->local_reduce; 554 gs->num_gop_local_reduce = gs->num_local_reduce; 555 556 map = gs->companion; 557 558 /* is there any local compression */ 559 if (!gs->num_local) { 560 gs->local_strength = NONE; 561 gs->num_local_gop = 0; 562 } else { 563 /* ok find intersection */ 564 map = gs->companion; 565 reduce = gs->local_reduce; 566 for (i=0, t1=0; i<gs->num_local; i++, reduce++) 567 { 568 if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0) 569 || 570 ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0) 571 { 572 /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */ 573 t1++; 574 if (gs->num_local_reduce[i]<=0) 575 {error_msg_fatal("nobody in list?");} 576 gs->num_local_reduce[i] *= -1; 577 } 578 **reduce=map[**reduce]; 579 } 580 581 /* intersection is empty */ 582 if (!t1) 583 { 584 gs->local_strength = FULL; 585 gs->num_local_gop = 0; 586 } 587 /* intersection not empty */ 588 else 589 { 590 gs->local_strength = PARTIAL; 591 SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce, 592 gs->num_local + 1, SORT_INT_PTR); 593 594 gs->num_local_gop = t1; 595 gs->num_local_total = gs->num_local; 596 gs->num_local -= t1; 597 gs->gop_local_reduce = gs->local_reduce; 598 gs->num_gop_local_reduce = gs->num_local_reduce; 599 600 for (i=0; i<t1; i++) 601 { 602 if (gs->num_gop_local_reduce[i]>=0) 603 {error_msg_fatal("they aren't negative?");} 604 gs->num_gop_local_reduce[i] *= -1; 605 gs->local_reduce++; 606 gs->num_local_reduce++; 607 } 608 gs->local_reduce++; 609 gs->num_local_reduce++; 610 } 611 } 612 613 elms = gs->pw_elm_list; 614 nel = gs->len_pw_list; 615 for (i=0; i<nel; i++) 616 {elms[i] = map[elms[i]];} 617 618 elms = gs->tree_map_in; 619 nel = gs->tree_map_sz; 620 for (i=0; i<nel; i++) 621 {elms[i] = map[elms[i]];} 622 623 /* clean up */ 624 free((void*) gs->local_elms); 625 free((void*) gs->companion); 626 free((void*) gs->elms); 627 free((void*) gs->ngh_buf); 628 gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL; 629 } 630 631 632 633 /****************************************************************************** 634 Function: place_in_tree() 635 636 Input : 637 Output: 638 Return: 639 Description: 640 641 642 ******************************************************************************/ 643 static 644 void 645 place_in_tree( int elm) 646 { 647 int *tp, n; 648 649 650 if (ntree==tree_buf_sz) 651 { 652 if (tree_buf_sz) 653 { 654 tp = tree_buf; 655 n = tree_buf_sz; 656 tree_buf_sz<<=1; 657 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 658 ivec_copy(tree_buf,tp,n); 659 free(tp); 660 } 661 else 662 { 663 tree_buf_sz = TREE_BUF_SZ; 664 tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt)); 665 } 666 } 667 668 tree_buf[ntree++] = elm; 669 } 670 671 672 673 /****************************************************************************** 674 Function: get_ngh_buf() 675 676 Input : 677 Output: 678 Return: 679 Description: 680 681 682 ******************************************************************************/ 683 static 684 void 685 get_ngh_buf(gs_id *gs) 686 { 687 int i, j, npw=0, ntree_map=0; 688 int p_mask_size, ngh_buf_size, buf_size; 689 int *p_mask, *sh_proc_mask, *pw_sh_proc_mask; 690 int *ngh_buf, *buf1, *buf2; 691 int offset, per_load, num_loads, or_ct, start, end; 692 int *ptr1, *ptr2, i_start, negl, nel, *elms; 693 int oper=GL_B_OR; 694 int *ptr3, *t_mask, level, ct1, ct2; 695 696 /* to make life easier */ 697 nel = gs->nel; 698 elms = gs->elms; 699 level = gs->level; 700 701 /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */ 702 p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes)); 703 set_bit_mask(p_mask,p_mask_size,my_id); 704 705 /* allocate space for masks and info bufs */ 706 gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size); 707 gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size); 708 gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel; 709 t_mask = (int*) malloc(p_mask_size); 710 gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size); 711 712 /* comm buffer size ... memory usage bounded by ~2*msg_buf */ 713 /* had thought I could exploit rendezvous threshold */ 714 715 /* default is one pass */ 716 per_load = negl = gs->negl; 717 gs->num_loads = num_loads = 1; 718 i=p_mask_size*negl; 719 720 /* possible overflow on buffer size */ 721 /* overflow hack */ 722 if (i<0) {i=INT_MAX;} 723 724 buf_size = PetscMin(msg_buf,i); 725 726 /* can we do it? */ 727 if (p_mask_size>buf_size) 728 {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);} 729 730 /* get giop buf space ... make *only* one malloc */ 731 buf1 = (int*) malloc(buf_size<<1); 732 733 /* more than one gior exchange needed? */ 734 if (buf_size!=i) 735 { 736 per_load = buf_size/p_mask_size; 737 buf_size = per_load*p_mask_size; 738 gs->num_loads = num_loads = negl/per_load + (negl%per_load>0); 739 } 740 741 742 /* convert buf sizes from #bytes to #ints - 32 bit only! */ 743 #ifdef SAFE 744 p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt); 745 #else 746 p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2; 747 #endif 748 749 /* find giop work space */ 750 buf2 = buf1+buf_size; 751 752 /* hold #ints needed for processor masks */ 753 gs->mask_sz=p_mask_size; 754 755 /* init buffers */ 756 ivec_zero(sh_proc_mask,p_mask_size); 757 ivec_zero(pw_sh_proc_mask,p_mask_size); 758 ivec_zero(ngh_buf,ngh_buf_size); 759 760 /* HACK reset tree info */ 761 tree_buf=NULL; 762 tree_buf_sz=ntree=0; 763 764 /* queue the tree elements for now */ 765 /* elms_q = new_queue(); */ 766 767 /* can also queue tree info for pruned or forest implememtation */ 768 /* mask_q = new_queue(); */ 769 770 /* ok do it */ 771 for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++) 772 { 773 /* identity for bitwise or is 000...000 */ 774 ivec_zero(buf1,buf_size); 775 776 /* load msg buffer */ 777 for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++) 778 { 779 offset = (offset-start)*p_mask_size; 780 ivec_copy(buf1+offset,p_mask,p_mask_size); 781 } 782 783 /* GLOBAL: pass buffer */ 784 giop(buf1,buf2,buf_size,&oper); 785 786 787 /* unload buffer into ngh_buf */ 788 ptr2=(elms+i_start); 789 for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++) 790 { 791 /* I own it ... may have to pairwise it */ 792 if (j==*ptr2) 793 { 794 /* do i share it w/anyone? */ 795 #ifdef SAFE 796 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 797 #else 798 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 799 #endif 800 /* guess not */ 801 if (ct1<2) 802 {ptr2++; ptr1+=p_mask_size; continue;} 803 804 /* i do ... so keep info and turn off my bit */ 805 ivec_copy(ptr1,ptr3,p_mask_size); 806 ivec_xor(ptr1,p_mask,p_mask_size); 807 ivec_or(sh_proc_mask,ptr1,p_mask_size); 808 809 /* is it to be done pairwise? */ 810 if (--ct1<=level) 811 { 812 npw++; 813 814 /* turn on high bit to indicate pw need to process */ 815 *ptr2++ |= TOP_BIT; 816 ivec_or(pw_sh_proc_mask,ptr1,p_mask_size); 817 ptr1+=p_mask_size; 818 continue; 819 } 820 821 /* get set for next and note that I have a tree contribution */ 822 /* could save exact elm index for tree here -> save a search */ 823 ptr2++; ptr1+=p_mask_size; ntree_map++; 824 } 825 /* i don't but still might be involved in tree */ 826 else 827 { 828 829 /* shared by how many? */ 830 #ifdef SAFE 831 ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt)); 832 #else 833 ct1 = ct_bits((char *)ptr3,p_mask_size<<2); 834 #endif 835 836 /* none! */ 837 if (ct1<2) 838 {continue;} 839 840 /* is it going to be done pairwise? but not by me of course!*/ 841 if (--ct1<=level) 842 {continue;} 843 } 844 /* LATER we're going to have to process it NOW */ 845 /* nope ... tree it */ 846 place_in_tree(j); 847 } 848 } 849 850 free((void*)t_mask); 851 free((void*)buf1); 852 853 gs->len_pw_list=npw; 854 gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 855 856 /* expand from bit mask list to int list and save ngh list */ 857 gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt)); 858 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs); 859 860 gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt)); 861 862 oper = GL_MAX; 863 ct1 = gs->num_nghs; 864 giop(&ct1,&ct2,1,&oper); 865 gs->max_nghs = ct1; 866 867 gs->tree_map_sz = ntree_map; 868 gs->max_left_over=ntree; 869 870 free((void*)p_mask); 871 free((void*)sh_proc_mask); 872 873 } 874 875 876 877 878 879 /****************************************************************************** 880 Function: pairwise_init() 881 882 Input : 883 Output: 884 Return: 885 Description: 886 887 if an element is shared by fewer that level# of nodes do pairwise exch 888 ******************************************************************************/ 889 static 890 void 891 set_pairwise(gs_id *gs) 892 { 893 int i, j; 894 int p_mask_size; 895 int *p_mask, *sh_proc_mask, *tmp_proc_mask; 896 int *ngh_buf, *buf2; 897 int offset; 898 int *msg_list, *msg_size, **msg_nodes, nprs; 899 int *pairwise_elm_list, len_pair_list=0; 900 int *iptr, t1, i_start, nel, *elms; 901 int ct; 902 903 904 905 /* to make life easier */ 906 nel = gs->nel; 907 elms = gs->elms; 908 ngh_buf = gs->ngh_buf; 909 sh_proc_mask = gs->pw_nghs; 910 911 /* need a few temp masks */ 912 p_mask_size = len_bit_mask(num_nodes); 913 p_mask = (int*) malloc(p_mask_size); 914 tmp_proc_mask = (int*) malloc(p_mask_size); 915 916 /* set mask to my my_id's bit mask */ 917 set_bit_mask(p_mask,p_mask_size,my_id); 918 919 #ifdef SAFE 920 p_mask_size /= sizeof(PetscInt); 921 #else 922 p_mask_size >>= 2; 923 #endif 924 925 len_pair_list=gs->len_pw_list; 926 gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt)); 927 928 /* how many processors (nghs) do we have to exchange with? */ 929 nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt)); 930 931 932 /* allocate space for gs_gop() info */ 933 gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs); 934 gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs); 935 gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1)); 936 937 /* init msg_size list */ 938 ivec_zero(msg_size,nprs); 939 940 /* expand from bit mask list to int list */ 941 bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list); 942 943 /* keep list of elements being handled pairwise */ 944 for (i=j=0;i<nel;i++) 945 { 946 if (elms[i] & TOP_BIT) 947 {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;} 948 } 949 pairwise_elm_list[j] = -1; 950 951 gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 952 gs->msg_ids_out[nprs] = MPI_REQUEST_NULL; 953 gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1)); 954 gs->msg_ids_in[nprs] = MPI_REQUEST_NULL; 955 gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz); 956 957 /* find who goes to each processor */ 958 for (i_start=i=0;i<nprs;i++) 959 { 960 /* processor i's mask */ 961 set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]); 962 963 /* det # going to processor i */ 964 for (ct=j=0;j<len_pair_list;j++) 965 { 966 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 967 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 968 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 969 {ct++;} 970 } 971 msg_size[i] = ct; 972 i_start = PetscMax(i_start,ct); 973 974 /*space to hold nodes in message to first neighbor */ 975 msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1)); 976 977 for (j=0;j<len_pair_list;j++) 978 { 979 buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size); 980 ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size); 981 if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt))) 982 {*iptr++ = j;} 983 } 984 *iptr = -1; 985 } 986 msg_nodes[nprs] = NULL; 987 988 j=gs->loc_node_pairs=i_start; 989 t1 = GL_MAX; 990 giop(&i_start,&offset,1,&t1); 991 gs->max_node_pairs = i_start; 992 993 i_start=j; 994 t1 = GL_MIN; 995 giop(&i_start,&offset,1,&t1); 996 gs->min_node_pairs = i_start; 997 998 i_start=j; 999 t1 = GL_ADD; 1000 giop(&i_start,&offset,1,&t1); 1001 gs->avg_node_pairs = i_start/num_nodes + 1; 1002 1003 i_start=nprs; 1004 t1 = GL_MAX; 1005 giop(&i_start,&offset,1,&t1); 1006 gs->max_pairs = i_start; 1007 1008 1009 /* remap pairwise in tail of gsi_via_bit_mask() */ 1010 gs->msg_total = ivec_sum(gs->msg_sizes,nprs); 1011 gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1012 gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz); 1013 1014 /* reset malloc pool */ 1015 free((void*)p_mask); 1016 free((void*)tmp_proc_mask); 1017 1018 } 1019 1020 1021 1022 /****************************************************************************** 1023 Function: set_tree() 1024 1025 Input : 1026 Output: 1027 Return: 1028 Description: 1029 1030 to do pruned tree just save ngh buf copy for each one and decode here! 1031 ******************************************************************************/ 1032 static 1033 void 1034 set_tree(gs_id *gs) 1035 { 1036 int i, j, n, nel; 1037 int *iptr_in, *iptr_out, *tree_elms, *elms; 1038 1039 1040 /* local work ptrs */ 1041 elms = gs->elms; 1042 nel = gs->nel; 1043 1044 /* how many via tree */ 1045 gs->tree_nel = n = ntree; 1046 gs->tree_elms = tree_elms = iptr_in = tree_buf; 1047 gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1048 gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz); 1049 j=gs->tree_map_sz; 1050 gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1)); 1051 gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1)); 1052 1053 /* search the longer of the two lists */ 1054 /* note ... could save this info in get_ngh_buf and save searches */ 1055 if (n<=nel) 1056 { 1057 /* bijective fct w/remap - search elm list */ 1058 for (i=0; i<n; i++) 1059 { 1060 if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0) 1061 {*iptr_in++ = j; *iptr_out++ = i;} 1062 } 1063 } 1064 else 1065 { 1066 for (i=0; i<nel; i++) 1067 { 1068 if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0) 1069 {*iptr_in++ = i; *iptr_out++ = j;} 1070 } 1071 } 1072 1073 /* sentinel */ 1074 *iptr_in = *iptr_out = -1; 1075 1076 } 1077 1078 1079 /****************************************************************************** 1080 Function: gather_scatter 1081 1082 Input : 1083 Output: 1084 Return: 1085 Description: 1086 ******************************************************************************/ 1087 static 1088 void 1089 gs_gop_local_out( gs_id *gs, PetscScalar *vals) 1090 { 1091 int *num, *map, **reduce; 1092 PetscScalar tmp; 1093 1094 1095 num = gs->num_gop_local_reduce; 1096 reduce = gs->gop_local_reduce; 1097 while ((map = *reduce++)) 1098 { 1099 /* wall */ 1100 if (*num == 2) 1101 { 1102 num ++; 1103 vals[map[1]] = vals[map[0]]; 1104 } 1105 /* corner shared by three elements */ 1106 else if (*num == 3) 1107 { 1108 num ++; 1109 vals[map[2]] = vals[map[1]] = vals[map[0]]; 1110 } 1111 /* corner shared by four elements */ 1112 else if (*num == 4) 1113 { 1114 num ++; 1115 vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]]; 1116 } 1117 /* general case ... odd geoms ... 3D*/ 1118 else 1119 { 1120 num++; 1121 tmp = *(vals + *map++); 1122 while (*map >= 0) 1123 {*(vals + *map++) = tmp;} 1124 } 1125 } 1126 } 1127 1128 1129 1130 /****************************************************************************** 1131 Function: gather_scatter 1132 1133 Input : 1134 Output: 1135 Return: 1136 Description: 1137 ******************************************************************************/ 1138 void 1139 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct) 1140 { 1141 /* local only operations!!! */ 1142 if (gs->num_local) 1143 {gs_gop_local_binary(gs,vals,fct);} 1144 1145 /* if intersection tree/pairwise and local isn't empty */ 1146 if (gs->num_local_gop) 1147 { 1148 gs_gop_local_in_binary(gs,vals,fct); 1149 1150 /* pairwise */ 1151 if (gs->num_pairs) 1152 {gs_gop_pairwise_binary(gs,vals,fct);} 1153 1154 /* tree */ 1155 else if (gs->max_left_over) 1156 {gs_gop_tree_binary(gs,vals,fct);} 1157 1158 gs_gop_local_out(gs,vals); 1159 } 1160 /* if intersection tree/pairwise and local is empty */ 1161 else 1162 { 1163 /* pairwise */ 1164 if (gs->num_pairs) 1165 {gs_gop_pairwise_binary(gs,vals,fct);} 1166 1167 /* tree */ 1168 else if (gs->max_left_over) 1169 {gs_gop_tree_binary(gs,vals,fct);} 1170 } 1171 } 1172 1173 1174 1175 /****************************************************************************** 1176 Function: gather_scatter 1177 1178 Input : 1179 Output: 1180 Return: 1181 Description: 1182 ******************************************************************************/ 1183 static 1184 void 1185 gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1186 { 1187 int *num, *map, **reduce; 1188 PetscScalar tmp; 1189 1190 1191 num = gs->num_local_reduce; 1192 reduce = gs->local_reduce; 1193 while ((map = *reduce)) 1194 { 1195 num ++; 1196 (*fct)(&tmp,NULL,1); 1197 /* tmp = 0.0; */ 1198 while (*map >= 0) 1199 {(*fct)(&tmp,(vals + *map),1); map++;} 1200 /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */ 1201 1202 map = *reduce++; 1203 while (*map >= 0) 1204 {*(vals + *map++) = tmp;} 1205 } 1206 } 1207 1208 1209 1210 /****************************************************************************** 1211 Function: gather_scatter 1212 1213 Input : 1214 Output: 1215 Return: 1216 Description: 1217 ******************************************************************************/ 1218 static 1219 void 1220 gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct) 1221 { 1222 int *num, *map, **reduce; 1223 PetscScalar *base; 1224 1225 1226 num = gs->num_gop_local_reduce; 1227 1228 reduce = gs->gop_local_reduce; 1229 while ((map = *reduce++)) 1230 { 1231 num++; 1232 base = vals + *map++; 1233 while (*map >= 0) 1234 {(*fct)(base,(vals + *map),1); map++;} 1235 } 1236 } 1237 1238 1239 1240 /****************************************************************************** 1241 Function: gather_scatter 1242 1243 VERSION 3 :: 1244 1245 Input : 1246 Output: 1247 Return: 1248 Description: 1249 ******************************************************************************/ 1250 static 1251 void 1252 gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals, 1253 rbfp fct) 1254 { 1255 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1256 int *iptr, *msg_list, *msg_size, **msg_nodes; 1257 int *pw, *list, *size, **nodes; 1258 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1259 MPI_Status status; 1260 1261 1262 /* strip and load s */ 1263 msg_list =list = gs->pair_list; 1264 msg_size =size = gs->msg_sizes; 1265 msg_nodes=nodes = gs->node_list; 1266 iptr=pw = gs->pw_elm_list; 1267 dptr1=dptr3 = gs->pw_vals; 1268 msg_ids_in = ids_in = gs->msg_ids_in; 1269 msg_ids_out = ids_out = gs->msg_ids_out; 1270 dptr2 = gs->out; 1271 in1=in2 = gs->in; 1272 1273 /* post the receives */ 1274 /* msg_nodes=nodes; */ 1275 do 1276 { 1277 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1278 second one *list and do list++ afterwards */ 1279 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1280 gs->gs_comm, msg_ids_in++); 1281 in1 += *size++; 1282 } 1283 while (*++msg_nodes); 1284 msg_nodes=nodes; 1285 1286 /* load gs values into in out gs buffers */ 1287 while (*iptr >= 0) 1288 {*dptr3++ = *(in_vals + *iptr++);} 1289 1290 /* load out buffers and post the sends */ 1291 while ((iptr = *msg_nodes++)) 1292 { 1293 dptr3 = dptr2; 1294 while (*iptr >= 0) 1295 {*dptr2++ = *(dptr1 + *iptr++);} 1296 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1297 /* is msg_ids_out++ correct? */ 1298 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1299 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1300 } 1301 1302 if (gs->max_left_over) 1303 {gs_gop_tree_binary(gs,in_vals,fct);} 1304 1305 /* process the received data */ 1306 msg_nodes=nodes; 1307 while ((iptr = *nodes++)) 1308 { 1309 /* Should I check the return value of MPI_Wait() or status? */ 1310 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1311 MPI_Wait(ids_in++, &status); 1312 while (*iptr >= 0) 1313 {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;} 1314 /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */ 1315 } 1316 1317 /* replace vals */ 1318 while (*pw >= 0) 1319 {*(in_vals + *pw++) = *dptr1++;} 1320 1321 /* clear isend message handles */ 1322 /* This changed for clarity though it could be the same */ 1323 while (*msg_nodes++) 1324 /* Should I check the return value of MPI_Wait() or status? */ 1325 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1326 {MPI_Wait(ids_out++, &status);} 1327 } 1328 1329 1330 1331 /****************************************************************************** 1332 Function: gather_scatter 1333 1334 Input : 1335 Output: 1336 Return: 1337 Description: 1338 ******************************************************************************/ 1339 static 1340 void 1341 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct) 1342 { 1343 int size; 1344 int *in, *out; 1345 PetscScalar *buf, *work; 1346 1347 in = gs->tree_map_in; 1348 out = gs->tree_map_out; 1349 buf = gs->tree_buf; 1350 work = gs->tree_work; 1351 size = gs->tree_nel; 1352 1353 /* load vals vector w/identity */ 1354 (*fct)(buf,NULL,size); 1355 1356 /* load my contribution into val vector */ 1357 while (*in >= 0) 1358 {(*fct)((buf + *out++),(vals + *in++),-1);} 1359 1360 gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0); 1361 1362 in = gs->tree_map_in; 1363 out = gs->tree_map_out; 1364 while (*in >= 0) 1365 {(*fct)((vals + *in++),(buf + *out++),-1);} 1366 1367 } 1368 1369 1370 1371 1372 /****************************************************************************** 1373 Function: gather_scatter 1374 1375 Input : 1376 Output: 1377 Return: 1378 Description: 1379 ******************************************************************************/ 1380 void 1381 gs_gop( gs_id *gs, PetscScalar *vals, const char *op) 1382 { 1383 switch (*op) { 1384 case '+': 1385 gs_gop_plus(gs,vals); 1386 break; 1387 case '*': 1388 gs_gop_times(gs,vals); 1389 break; 1390 case 'a': 1391 gs_gop_min_abs(gs,vals); 1392 break; 1393 case 'A': 1394 gs_gop_max_abs(gs,vals); 1395 break; 1396 case 'e': 1397 gs_gop_exists(gs,vals); 1398 break; 1399 case 'm': 1400 gs_gop_min(gs,vals); 1401 break; 1402 case 'M': 1403 gs_gop_max(gs,vals); break; 1404 /* 1405 if (*(op+1)=='\0') 1406 {gs_gop_max(gs,vals); break;} 1407 else if (*(op+1)=='X') 1408 {gs_gop_max_abs(gs,vals); break;} 1409 else if (*(op+1)=='N') 1410 {gs_gop_min_abs(gs,vals); break;} 1411 */ 1412 default: 1413 error_msg_warning("gs_gop() :: %c is not a valid op",op[0]); 1414 error_msg_warning("gs_gop() :: default :: plus"); 1415 gs_gop_plus(gs,vals); 1416 break; 1417 } 1418 } 1419 1420 1421 /****************************************************************************** 1422 Function: gather_scatter 1423 1424 Input : 1425 Output: 1426 Return: 1427 Description: 1428 ******************************************************************************/ 1429 static void 1430 gs_gop_exists( gs_id *gs, PetscScalar *vals) 1431 { 1432 /* local only operations!!! */ 1433 if (gs->num_local) 1434 {gs_gop_local_exists(gs,vals);} 1435 1436 /* if intersection tree/pairwise and local isn't empty */ 1437 if (gs->num_local_gop) 1438 { 1439 gs_gop_local_in_exists(gs,vals); 1440 1441 /* pairwise */ 1442 if (gs->num_pairs) 1443 {gs_gop_pairwise_exists(gs,vals);} 1444 1445 /* tree */ 1446 else if (gs->max_left_over) 1447 {gs_gop_tree_exists(gs,vals);} 1448 1449 gs_gop_local_out(gs,vals); 1450 } 1451 /* if intersection tree/pairwise and local is empty */ 1452 else 1453 { 1454 /* pairwise */ 1455 if (gs->num_pairs) 1456 {gs_gop_pairwise_exists(gs,vals);} 1457 1458 /* tree */ 1459 else if (gs->max_left_over) 1460 {gs_gop_tree_exists(gs,vals);} 1461 } 1462 } 1463 1464 1465 1466 /****************************************************************************** 1467 Function: gather_scatter 1468 1469 Input : 1470 Output: 1471 Return: 1472 Description: 1473 ******************************************************************************/ 1474 static 1475 void 1476 gs_gop_local_exists( gs_id *gs, PetscScalar *vals) 1477 { 1478 int *num, *map, **reduce; 1479 PetscScalar tmp; 1480 1481 1482 num = gs->num_local_reduce; 1483 reduce = gs->local_reduce; 1484 while ((map = *reduce)) 1485 { 1486 num ++; 1487 tmp = 0.0; 1488 while (*map >= 0) 1489 {tmp = EXISTS(tmp,*(vals + *map)); map++;} 1490 1491 map = *reduce++; 1492 while (*map >= 0) 1493 {*(vals + *map++) = tmp;} 1494 } 1495 } 1496 1497 1498 1499 /****************************************************************************** 1500 Function: gather_scatter 1501 1502 Input : 1503 Output: 1504 Return: 1505 Description: 1506 ******************************************************************************/ 1507 static 1508 void 1509 gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals) 1510 { 1511 int *num, *map, **reduce; 1512 PetscScalar *base; 1513 1514 1515 num = gs->num_gop_local_reduce; 1516 reduce = gs->gop_local_reduce; 1517 while ((map = *reduce++)) 1518 { 1519 num++; 1520 base = vals + *map++; 1521 while (*map >= 0) 1522 {*base = EXISTS(*base,*(vals + *map)); map++;} 1523 } 1524 } 1525 1526 1527 1528 /****************************************************************************** 1529 Function: gather_scatter 1530 1531 VERSION 3 :: 1532 1533 Input : 1534 Output: 1535 Return: 1536 Description: 1537 ******************************************************************************/ 1538 static 1539 void 1540 gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals) 1541 { 1542 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1543 int *iptr, *msg_list, *msg_size, **msg_nodes; 1544 int *pw, *list, *size, **nodes; 1545 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1546 MPI_Status status; 1547 1548 1549 /* strip and load s */ 1550 msg_list =list = gs->pair_list; 1551 msg_size =size = gs->msg_sizes; 1552 msg_nodes=nodes = gs->node_list; 1553 iptr=pw = gs->pw_elm_list; 1554 dptr1=dptr3 = gs->pw_vals; 1555 msg_ids_in = ids_in = gs->msg_ids_in; 1556 msg_ids_out = ids_out = gs->msg_ids_out; 1557 dptr2 = gs->out; 1558 in1=in2 = gs->in; 1559 1560 /* post the receives */ 1561 /* msg_nodes=nodes; */ 1562 do 1563 { 1564 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1565 second one *list and do list++ afterwards */ 1566 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1567 gs->gs_comm, msg_ids_in++); 1568 in1 += *size++; 1569 } 1570 while (*++msg_nodes); 1571 msg_nodes=nodes; 1572 1573 /* load gs values into in out gs buffers */ 1574 while (*iptr >= 0) 1575 {*dptr3++ = *(in_vals + *iptr++);} 1576 1577 /* load out buffers and post the sends */ 1578 while ((iptr = *msg_nodes++)) 1579 { 1580 dptr3 = dptr2; 1581 while (*iptr >= 0) 1582 {*dptr2++ = *(dptr1 + *iptr++);} 1583 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1584 /* is msg_ids_out++ correct? */ 1585 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1586 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1587 } 1588 1589 if (gs->max_left_over) 1590 {gs_gop_tree_exists(gs,in_vals);} 1591 1592 /* process the received data */ 1593 msg_nodes=nodes; 1594 while ((iptr = *nodes++)) 1595 { 1596 /* Should I check the return value of MPI_Wait() or status? */ 1597 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1598 MPI_Wait(ids_in++, &status); 1599 while (*iptr >= 0) 1600 {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1601 } 1602 1603 /* replace vals */ 1604 while (*pw >= 0) 1605 {*(in_vals + *pw++) = *dptr1++;} 1606 1607 /* clear isend message handles */ 1608 /* This changed for clarity though it could be the same */ 1609 while (*msg_nodes++) 1610 /* Should I check the return value of MPI_Wait() or status? */ 1611 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1612 {MPI_Wait(ids_out++, &status);} 1613 } 1614 1615 1616 1617 /****************************************************************************** 1618 Function: gather_scatter 1619 1620 Input : 1621 Output: 1622 Return: 1623 Description: 1624 ******************************************************************************/ 1625 static 1626 void 1627 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals) 1628 { 1629 int size; 1630 int *in, *out; 1631 PetscScalar *buf, *work; 1632 int op[] = {GL_EXISTS,0}; 1633 1634 1635 in = gs->tree_map_in; 1636 out = gs->tree_map_out; 1637 buf = gs->tree_buf; 1638 work = gs->tree_work; 1639 size = gs->tree_nel; 1640 1641 rvec_zero(buf,size); 1642 1643 while (*in >= 0) 1644 { 1645 /* 1646 printf("%d :: out=%d\n",my_id,*out); 1647 printf("%d :: in=%d\n",my_id,*in); 1648 */ 1649 *(buf + *out++) = *(vals + *in++); 1650 } 1651 1652 grop(buf,work,size,op); 1653 1654 in = gs->tree_map_in; 1655 out = gs->tree_map_out; 1656 1657 while (*in >= 0) 1658 {*(vals + *in++) = *(buf + *out++);} 1659 1660 } 1661 1662 1663 1664 /****************************************************************************** 1665 Function: gather_scatter 1666 1667 Input : 1668 Output: 1669 Return: 1670 Description: 1671 ******************************************************************************/ 1672 static void 1673 gs_gop_max_abs( gs_id *gs, PetscScalar *vals) 1674 { 1675 /* local only operations!!! */ 1676 if (gs->num_local) 1677 {gs_gop_local_max_abs(gs,vals);} 1678 1679 /* if intersection tree/pairwise and local isn't empty */ 1680 if (gs->num_local_gop) 1681 { 1682 gs_gop_local_in_max_abs(gs,vals); 1683 1684 /* pairwise */ 1685 if (gs->num_pairs) 1686 {gs_gop_pairwise_max_abs(gs,vals);} 1687 1688 /* tree */ 1689 else if (gs->max_left_over) 1690 {gs_gop_tree_max_abs(gs,vals);} 1691 1692 gs_gop_local_out(gs,vals); 1693 } 1694 /* if intersection tree/pairwise and local is empty */ 1695 else 1696 { 1697 /* pairwise */ 1698 if (gs->num_pairs) 1699 {gs_gop_pairwise_max_abs(gs,vals);} 1700 1701 /* tree */ 1702 else if (gs->max_left_over) 1703 {gs_gop_tree_max_abs(gs,vals);} 1704 } 1705 } 1706 1707 1708 1709 /****************************************************************************** 1710 Function: gather_scatter 1711 1712 Input : 1713 Output: 1714 Return: 1715 Description: 1716 ******************************************************************************/ 1717 static 1718 void 1719 gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals) 1720 { 1721 int *num, *map, **reduce; 1722 PetscScalar tmp; 1723 1724 1725 num = gs->num_local_reduce; 1726 reduce = gs->local_reduce; 1727 while ((map = *reduce)) 1728 { 1729 num ++; 1730 tmp = 0.0; 1731 while (*map >= 0) 1732 {tmp = MAX_FABS(tmp,*(vals + *map)); map++;} 1733 1734 map = *reduce++; 1735 while (*map >= 0) 1736 {*(vals + *map++) = tmp;} 1737 } 1738 } 1739 1740 1741 1742 /****************************************************************************** 1743 Function: gather_scatter 1744 1745 Input : 1746 Output: 1747 Return: 1748 Description: 1749 ******************************************************************************/ 1750 static 1751 void 1752 gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals) 1753 { 1754 int *num, *map, **reduce; 1755 PetscScalar *base; 1756 1757 1758 num = gs->num_gop_local_reduce; 1759 reduce = gs->gop_local_reduce; 1760 while ((map = *reduce++)) 1761 { 1762 num++; 1763 base = vals + *map++; 1764 while (*map >= 0) 1765 {*base = MAX_FABS(*base,*(vals + *map)); map++;} 1766 } 1767 } 1768 1769 1770 1771 /****************************************************************************** 1772 Function: gather_scatter 1773 1774 VERSION 3 :: 1775 1776 Input : 1777 Output: 1778 Return: 1779 Description: 1780 ******************************************************************************/ 1781 static 1782 void 1783 gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals) 1784 { 1785 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 1786 int *iptr, *msg_list, *msg_size, **msg_nodes; 1787 int *pw, *list, *size, **nodes; 1788 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 1789 MPI_Status status; 1790 1791 1792 /* strip and load s */ 1793 msg_list =list = gs->pair_list; 1794 msg_size =size = gs->msg_sizes; 1795 msg_nodes=nodes = gs->node_list; 1796 iptr=pw = gs->pw_elm_list; 1797 dptr1=dptr3 = gs->pw_vals; 1798 msg_ids_in = ids_in = gs->msg_ids_in; 1799 msg_ids_out = ids_out = gs->msg_ids_out; 1800 dptr2 = gs->out; 1801 in1=in2 = gs->in; 1802 1803 /* post the receives */ 1804 /* msg_nodes=nodes; */ 1805 do 1806 { 1807 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 1808 second one *list and do list++ afterwards */ 1809 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 1810 gs->gs_comm, msg_ids_in++); 1811 in1 += *size++; 1812 } 1813 while (*++msg_nodes); 1814 msg_nodes=nodes; 1815 1816 /* load gs values into in out gs buffers */ 1817 while (*iptr >= 0) 1818 {*dptr3++ = *(in_vals + *iptr++);} 1819 1820 /* load out buffers and post the sends */ 1821 while ((iptr = *msg_nodes++)) 1822 { 1823 dptr3 = dptr2; 1824 while (*iptr >= 0) 1825 {*dptr2++ = *(dptr1 + *iptr++);} 1826 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 1827 /* is msg_ids_out++ correct? */ 1828 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 1829 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 1830 } 1831 1832 if (gs->max_left_over) 1833 {gs_gop_tree_max_abs(gs,in_vals);} 1834 1835 /* process the received data */ 1836 msg_nodes=nodes; 1837 while ((iptr = *nodes++)) 1838 { 1839 /* Should I check the return value of MPI_Wait() or status? */ 1840 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1841 MPI_Wait(ids_in++, &status); 1842 while (*iptr >= 0) 1843 {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 1844 } 1845 1846 /* replace vals */ 1847 while (*pw >= 0) 1848 {*(in_vals + *pw++) = *dptr1++;} 1849 1850 /* clear isend message handles */ 1851 /* This changed for clarity though it could be the same */ 1852 while (*msg_nodes++) 1853 /* Should I check the return value of MPI_Wait() or status? */ 1854 /* Can this loop be replaced by a call to MPI_Waitall()? */ 1855 {MPI_Wait(ids_out++, &status);} 1856 } 1857 1858 1859 1860 /****************************************************************************** 1861 Function: gather_scatter 1862 1863 Input : 1864 Output: 1865 Return: 1866 Description: 1867 ******************************************************************************/ 1868 static 1869 void 1870 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals) 1871 { 1872 int size; 1873 int *in, *out; 1874 PetscScalar *buf, *work; 1875 int op[] = {GL_MAX_ABS,0}; 1876 1877 1878 in = gs->tree_map_in; 1879 out = gs->tree_map_out; 1880 buf = gs->tree_buf; 1881 work = gs->tree_work; 1882 size = gs->tree_nel; 1883 1884 rvec_zero(buf,size); 1885 1886 while (*in >= 0) 1887 { 1888 /* 1889 printf("%d :: out=%d\n",my_id,*out); 1890 printf("%d :: in=%d\n",my_id,*in); 1891 */ 1892 *(buf + *out++) = *(vals + *in++); 1893 } 1894 1895 grop(buf,work,size,op); 1896 1897 in = gs->tree_map_in; 1898 out = gs->tree_map_out; 1899 1900 while (*in >= 0) 1901 {*(vals + *in++) = *(buf + *out++);} 1902 1903 } 1904 1905 1906 1907 /****************************************************************************** 1908 Function: gather_scatter 1909 1910 Input : 1911 Output: 1912 Return: 1913 Description: 1914 ******************************************************************************/ 1915 static void 1916 gs_gop_max( gs_id *gs, PetscScalar *vals) 1917 { 1918 1919 /* local only operations!!! */ 1920 if (gs->num_local) 1921 {gs_gop_local_max(gs,vals);} 1922 1923 /* if intersection tree/pairwise and local isn't empty */ 1924 if (gs->num_local_gop) 1925 { 1926 gs_gop_local_in_max(gs,vals); 1927 1928 /* pairwise */ 1929 if (gs->num_pairs) 1930 {gs_gop_pairwise_max(gs,vals);} 1931 1932 /* tree */ 1933 else if (gs->max_left_over) 1934 {gs_gop_tree_max(gs,vals);} 1935 1936 gs_gop_local_out(gs,vals); 1937 } 1938 /* if intersection tree/pairwise and local is empty */ 1939 else 1940 { 1941 /* pairwise */ 1942 if (gs->num_pairs) 1943 {gs_gop_pairwise_max(gs,vals);} 1944 1945 /* tree */ 1946 else if (gs->max_left_over) 1947 {gs_gop_tree_max(gs,vals);} 1948 } 1949 } 1950 1951 1952 1953 /****************************************************************************** 1954 Function: gather_scatter 1955 1956 Input : 1957 Output: 1958 Return: 1959 Description: 1960 ******************************************************************************/ 1961 static 1962 void 1963 gs_gop_local_max( gs_id *gs, PetscScalar *vals) 1964 { 1965 int *num, *map, **reduce; 1966 PetscScalar tmp; 1967 1968 1969 num = gs->num_local_reduce; 1970 reduce = gs->local_reduce; 1971 while ((map = *reduce)) 1972 { 1973 num ++; 1974 tmp = -REAL_MAX; 1975 while (*map >= 0) 1976 {tmp = PetscMax(tmp,*(vals + *map)); map++;} 1977 1978 map = *reduce++; 1979 while (*map >= 0) 1980 {*(vals + *map++) = tmp;} 1981 } 1982 } 1983 1984 1985 1986 /****************************************************************************** 1987 Function: gather_scatter 1988 1989 Input : 1990 Output: 1991 Return: 1992 Description: 1993 ******************************************************************************/ 1994 static 1995 void 1996 gs_gop_local_in_max( gs_id *gs, PetscScalar *vals) 1997 { 1998 int *num, *map, **reduce; 1999 PetscScalar *base; 2000 2001 2002 num = gs->num_gop_local_reduce; 2003 reduce = gs->gop_local_reduce; 2004 while ((map = *reduce++)) 2005 { 2006 num++; 2007 base = vals + *map++; 2008 while (*map >= 0) 2009 {*base = PetscMax(*base,*(vals + *map)); map++;} 2010 } 2011 } 2012 2013 2014 2015 /****************************************************************************** 2016 Function: gather_scatter 2017 2018 VERSION 3 :: 2019 2020 Input : 2021 Output: 2022 Return: 2023 Description: 2024 ******************************************************************************/ 2025 static 2026 void 2027 gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals) 2028 { 2029 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2030 int *iptr, *msg_list, *msg_size, **msg_nodes; 2031 int *pw, *list, *size, **nodes; 2032 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2033 MPI_Status status; 2034 2035 2036 /* strip and load s */ 2037 msg_list =list = gs->pair_list; 2038 msg_size =size = gs->msg_sizes; 2039 msg_nodes=nodes = gs->node_list; 2040 iptr=pw = gs->pw_elm_list; 2041 dptr1=dptr3 = gs->pw_vals; 2042 msg_ids_in = ids_in = gs->msg_ids_in; 2043 msg_ids_out = ids_out = gs->msg_ids_out; 2044 dptr2 = gs->out; 2045 in1=in2 = gs->in; 2046 2047 /* post the receives */ 2048 /* msg_nodes=nodes; */ 2049 do 2050 { 2051 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2052 second one *list and do list++ afterwards */ 2053 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2054 gs->gs_comm, msg_ids_in++); 2055 in1 += *size++; 2056 } 2057 while (*++msg_nodes); 2058 msg_nodes=nodes; 2059 2060 /* load gs values into in out gs buffers */ 2061 while (*iptr >= 0) 2062 {*dptr3++ = *(in_vals + *iptr++);} 2063 2064 /* load out buffers and post the sends */ 2065 while ((iptr = *msg_nodes++)) 2066 { 2067 dptr3 = dptr2; 2068 while (*iptr >= 0) 2069 {*dptr2++ = *(dptr1 + *iptr++);} 2070 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2071 /* is msg_ids_out++ correct? */ 2072 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2073 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2074 } 2075 2076 if (gs->max_left_over) 2077 {gs_gop_tree_max(gs,in_vals);} 2078 2079 /* process the received data */ 2080 msg_nodes=nodes; 2081 while ((iptr = *nodes++)) 2082 { 2083 /* Should I check the return value of MPI_Wait() or status? */ 2084 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2085 MPI_Wait(ids_in++, &status); 2086 while (*iptr >= 0) 2087 {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2088 } 2089 2090 /* replace vals */ 2091 while (*pw >= 0) 2092 {*(in_vals + *pw++) = *dptr1++;} 2093 2094 /* clear isend message handles */ 2095 /* This changed for clarity though it could be the same */ 2096 while (*msg_nodes++) 2097 /* Should I check the return value of MPI_Wait() or status? */ 2098 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2099 {MPI_Wait(ids_out++, &status);} 2100 } 2101 2102 2103 2104 /****************************************************************************** 2105 Function: gather_scatter 2106 2107 Input : 2108 Output: 2109 Return: 2110 Description: 2111 ******************************************************************************/ 2112 static 2113 void 2114 gs_gop_tree_max(gs_id *gs, PetscScalar *vals) 2115 { 2116 int size; 2117 int *in, *out; 2118 PetscScalar *buf, *work; 2119 2120 in = gs->tree_map_in; 2121 out = gs->tree_map_out; 2122 buf = gs->tree_buf; 2123 work = gs->tree_work; 2124 size = gs->tree_nel; 2125 2126 rvec_set(buf,-REAL_MAX,size); 2127 2128 while (*in >= 0) 2129 {*(buf + *out++) = *(vals + *in++);} 2130 2131 in = gs->tree_map_in; 2132 out = gs->tree_map_out; 2133 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm); 2134 while (*in >= 0) 2135 {*(vals + *in++) = *(work + *out++);} 2136 2137 } 2138 2139 2140 2141 /****************************************************************************** 2142 Function: gather_scatter 2143 2144 Input : 2145 Output: 2146 Return: 2147 Description: 2148 ******************************************************************************/ 2149 static void 2150 gs_gop_min_abs( gs_id *gs, PetscScalar *vals) 2151 { 2152 2153 /* local only operations!!! */ 2154 if (gs->num_local) 2155 {gs_gop_local_min_abs(gs,vals);} 2156 2157 /* if intersection tree/pairwise and local isn't empty */ 2158 if (gs->num_local_gop) 2159 { 2160 gs_gop_local_in_min_abs(gs,vals); 2161 2162 /* pairwise */ 2163 if (gs->num_pairs) 2164 {gs_gop_pairwise_min_abs(gs,vals);} 2165 2166 /* tree */ 2167 else if (gs->max_left_over) 2168 {gs_gop_tree_min_abs(gs,vals);} 2169 2170 gs_gop_local_out(gs,vals); 2171 } 2172 /* if intersection tree/pairwise and local is empty */ 2173 else 2174 { 2175 /* pairwise */ 2176 if (gs->num_pairs) 2177 {gs_gop_pairwise_min_abs(gs,vals);} 2178 2179 /* tree */ 2180 else if (gs->max_left_over) 2181 {gs_gop_tree_min_abs(gs,vals);} 2182 } 2183 } 2184 2185 2186 2187 /****************************************************************************** 2188 Function: gather_scatter 2189 2190 Input : 2191 Output: 2192 Return: 2193 Description: 2194 ******************************************************************************/ 2195 static 2196 void 2197 gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals) 2198 { 2199 int *num, *map, **reduce; 2200 PetscScalar tmp; 2201 2202 2203 num = gs->num_local_reduce; 2204 reduce = gs->local_reduce; 2205 while ((map = *reduce)) 2206 { 2207 num ++; 2208 tmp = REAL_MAX; 2209 while (*map >= 0) 2210 {tmp = MIN_FABS(tmp,*(vals + *map)); map++;} 2211 2212 map = *reduce++; 2213 while (*map >= 0) 2214 {*(vals + *map++) = tmp;} 2215 } 2216 } 2217 2218 2219 2220 /****************************************************************************** 2221 Function: gather_scatter 2222 2223 Input : 2224 Output: 2225 Return: 2226 Description: 2227 ******************************************************************************/ 2228 static 2229 void 2230 gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals) 2231 { 2232 int *num, *map, **reduce; 2233 PetscScalar *base; 2234 2235 num = gs->num_gop_local_reduce; 2236 reduce = gs->gop_local_reduce; 2237 while ((map = *reduce++)) 2238 { 2239 num++; 2240 base = vals + *map++; 2241 while (*map >= 0) 2242 {*base = MIN_FABS(*base,*(vals + *map)); map++;} 2243 } 2244 } 2245 2246 2247 2248 /****************************************************************************** 2249 Function: gather_scatter 2250 2251 VERSION 3 :: 2252 2253 Input : 2254 Output: 2255 Return: 2256 Description: 2257 ******************************************************************************/ 2258 static 2259 void 2260 gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals) 2261 { 2262 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2263 int *iptr, *msg_list, *msg_size, **msg_nodes; 2264 int *pw, *list, *size, **nodes; 2265 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2266 MPI_Status status; 2267 2268 2269 /* strip and load s */ 2270 msg_list =list = gs->pair_list; 2271 msg_size =size = gs->msg_sizes; 2272 msg_nodes=nodes = gs->node_list; 2273 iptr=pw = gs->pw_elm_list; 2274 dptr1=dptr3 = gs->pw_vals; 2275 msg_ids_in = ids_in = gs->msg_ids_in; 2276 msg_ids_out = ids_out = gs->msg_ids_out; 2277 dptr2 = gs->out; 2278 in1=in2 = gs->in; 2279 2280 /* post the receives */ 2281 /* msg_nodes=nodes; */ 2282 do 2283 { 2284 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2285 second one *list and do list++ afterwards */ 2286 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2287 gs->gs_comm, msg_ids_in++); 2288 in1 += *size++; 2289 } 2290 while (*++msg_nodes); 2291 msg_nodes=nodes; 2292 2293 /* load gs values into in out gs buffers */ 2294 while (*iptr >= 0) 2295 {*dptr3++ = *(in_vals + *iptr++);} 2296 2297 /* load out buffers and post the sends */ 2298 while ((iptr = *msg_nodes++)) 2299 { 2300 dptr3 = dptr2; 2301 while (*iptr >= 0) 2302 {*dptr2++ = *(dptr1 + *iptr++);} 2303 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2304 /* is msg_ids_out++ correct? */ 2305 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2306 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2307 } 2308 2309 if (gs->max_left_over) 2310 {gs_gop_tree_min_abs(gs,in_vals);} 2311 2312 /* process the received data */ 2313 msg_nodes=nodes; 2314 while ((iptr = *nodes++)) 2315 { 2316 /* Should I check the return value of MPI_Wait() or status? */ 2317 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2318 MPI_Wait(ids_in++, &status); 2319 while (*iptr >= 0) 2320 {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2321 } 2322 2323 /* replace vals */ 2324 while (*pw >= 0) 2325 {*(in_vals + *pw++) = *dptr1++;} 2326 2327 /* clear isend message handles */ 2328 /* This changed for clarity though it could be the same */ 2329 while (*msg_nodes++) 2330 /* Should I check the return value of MPI_Wait() or status? */ 2331 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2332 {MPI_Wait(ids_out++, &status);} 2333 } 2334 2335 2336 2337 /****************************************************************************** 2338 Function: gather_scatter 2339 2340 Input : 2341 Output: 2342 Return: 2343 Description: 2344 ******************************************************************************/ 2345 static 2346 void 2347 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals) 2348 { 2349 int size; 2350 int *in, *out; 2351 PetscScalar *buf, *work; 2352 int op[] = {GL_MIN_ABS,0}; 2353 2354 2355 in = gs->tree_map_in; 2356 out = gs->tree_map_out; 2357 buf = gs->tree_buf; 2358 work = gs->tree_work; 2359 size = gs->tree_nel; 2360 2361 rvec_set(buf,REAL_MAX,size); 2362 2363 while (*in >= 0) 2364 {*(buf + *out++) = *(vals + *in++);} 2365 2366 in = gs->tree_map_in; 2367 out = gs->tree_map_out; 2368 grop(buf,work,size,op); 2369 while (*in >= 0) 2370 {*(vals + *in++) = *(buf + *out++);} 2371 2372 } 2373 2374 2375 2376 /****************************************************************************** 2377 Function: gather_scatter 2378 2379 Input : 2380 Output: 2381 Return: 2382 Description: 2383 ******************************************************************************/ 2384 static void 2385 gs_gop_min( gs_id *gs, PetscScalar *vals) 2386 { 2387 2388 /* local only operations!!! */ 2389 if (gs->num_local) 2390 {gs_gop_local_min(gs,vals);} 2391 2392 /* if intersection tree/pairwise and local isn't empty */ 2393 if (gs->num_local_gop) 2394 { 2395 gs_gop_local_in_min(gs,vals); 2396 2397 /* pairwise */ 2398 if (gs->num_pairs) 2399 {gs_gop_pairwise_min(gs,vals);} 2400 2401 /* tree */ 2402 else if (gs->max_left_over) 2403 {gs_gop_tree_min(gs,vals);} 2404 2405 gs_gop_local_out(gs,vals); 2406 } 2407 /* if intersection tree/pairwise and local is empty */ 2408 else 2409 { 2410 /* pairwise */ 2411 if (gs->num_pairs) 2412 {gs_gop_pairwise_min(gs,vals);} 2413 2414 /* tree */ 2415 else if (gs->max_left_over) 2416 {gs_gop_tree_min(gs,vals);} 2417 } 2418 } 2419 2420 2421 2422 /****************************************************************************** 2423 Function: gather_scatter 2424 2425 Input : 2426 Output: 2427 Return: 2428 Description: 2429 ******************************************************************************/ 2430 static 2431 void 2432 gs_gop_local_min( gs_id *gs, PetscScalar *vals) 2433 { 2434 int *num, *map, **reduce; 2435 PetscScalar tmp; 2436 2437 num = gs->num_local_reduce; 2438 reduce = gs->local_reduce; 2439 while ((map = *reduce)) 2440 { 2441 num ++; 2442 tmp = REAL_MAX; 2443 while (*map >= 0) 2444 {tmp = PetscMin(tmp,*(vals + *map)); map++;} 2445 2446 map = *reduce++; 2447 while (*map >= 0) 2448 {*(vals + *map++) = tmp;} 2449 } 2450 } 2451 2452 2453 2454 /****************************************************************************** 2455 Function: gather_scatter 2456 2457 Input : 2458 Output: 2459 Return: 2460 Description: 2461 ******************************************************************************/ 2462 static 2463 void 2464 gs_gop_local_in_min( gs_id *gs, PetscScalar *vals) 2465 { 2466 int *num, *map, **reduce; 2467 PetscScalar *base; 2468 2469 num = gs->num_gop_local_reduce; 2470 reduce = gs->gop_local_reduce; 2471 while ((map = *reduce++)) 2472 { 2473 num++; 2474 base = vals + *map++; 2475 while (*map >= 0) 2476 {*base = PetscMin(*base,*(vals + *map)); map++;} 2477 } 2478 } 2479 2480 2481 2482 /****************************************************************************** 2483 Function: gather_scatter 2484 2485 VERSION 3 :: 2486 2487 Input : 2488 Output: 2489 Return: 2490 Description: 2491 ******************************************************************************/ 2492 static 2493 void 2494 gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals) 2495 { 2496 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2497 int *iptr, *msg_list, *msg_size, **msg_nodes; 2498 int *pw, *list, *size, **nodes; 2499 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2500 MPI_Status status; 2501 2502 2503 /* strip and load s */ 2504 msg_list =list = gs->pair_list; 2505 msg_size =size = gs->msg_sizes; 2506 msg_nodes=nodes = gs->node_list; 2507 iptr=pw = gs->pw_elm_list; 2508 dptr1=dptr3 = gs->pw_vals; 2509 msg_ids_in = ids_in = gs->msg_ids_in; 2510 msg_ids_out = ids_out = gs->msg_ids_out; 2511 dptr2 = gs->out; 2512 in1=in2 = gs->in; 2513 2514 /* post the receives */ 2515 /* msg_nodes=nodes; */ 2516 do 2517 { 2518 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2519 second one *list and do list++ afterwards */ 2520 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2521 gs->gs_comm, msg_ids_in++); 2522 in1 += *size++; 2523 } 2524 while (*++msg_nodes); 2525 msg_nodes=nodes; 2526 2527 /* load gs values into in out gs buffers */ 2528 while (*iptr >= 0) 2529 {*dptr3++ = *(in_vals + *iptr++);} 2530 2531 /* load out buffers and post the sends */ 2532 while ((iptr = *msg_nodes++)) 2533 { 2534 dptr3 = dptr2; 2535 while (*iptr >= 0) 2536 {*dptr2++ = *(dptr1 + *iptr++);} 2537 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2538 /* is msg_ids_out++ correct? */ 2539 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2540 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2541 } 2542 2543 /* process the received data */ 2544 if (gs->max_left_over) 2545 {gs_gop_tree_min(gs,in_vals);} 2546 2547 msg_nodes=nodes; 2548 while ((iptr = *nodes++)) 2549 { 2550 /* Should I check the return value of MPI_Wait() or status? */ 2551 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2552 MPI_Wait(ids_in++, &status); 2553 while (*iptr >= 0) 2554 {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;} 2555 } 2556 2557 /* replace vals */ 2558 while (*pw >= 0) 2559 {*(in_vals + *pw++) = *dptr1++;} 2560 2561 /* clear isend message handles */ 2562 /* This changed for clarity though it could be the same */ 2563 while (*msg_nodes++) 2564 /* Should I check the return value of MPI_Wait() or status? */ 2565 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2566 {MPI_Wait(ids_out++, &status);} 2567 } 2568 2569 2570 2571 /****************************************************************************** 2572 Function: gather_scatter 2573 2574 Input : 2575 Output: 2576 Return: 2577 Description: 2578 ******************************************************************************/ 2579 static 2580 void 2581 gs_gop_tree_min(gs_id *gs, PetscScalar *vals) 2582 { 2583 int size; 2584 int *in, *out; 2585 PetscScalar *buf, *work; 2586 2587 in = gs->tree_map_in; 2588 out = gs->tree_map_out; 2589 buf = gs->tree_buf; 2590 work = gs->tree_work; 2591 size = gs->tree_nel; 2592 2593 rvec_set(buf,REAL_MAX,size); 2594 2595 while (*in >= 0) 2596 {*(buf + *out++) = *(vals + *in++);} 2597 2598 in = gs->tree_map_in; 2599 out = gs->tree_map_out; 2600 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm); 2601 while (*in >= 0) 2602 {*(vals + *in++) = *(work + *out++);} 2603 } 2604 2605 2606 2607 /****************************************************************************** 2608 Function: gather_scatter 2609 2610 Input : 2611 Output: 2612 Return: 2613 Description: 2614 ******************************************************************************/ 2615 static void 2616 gs_gop_times( gs_id *gs, PetscScalar *vals) 2617 { 2618 2619 /* local only operations!!! */ 2620 if (gs->num_local) 2621 {gs_gop_local_times(gs,vals);} 2622 2623 /* if intersection tree/pairwise and local isn't empty */ 2624 if (gs->num_local_gop) 2625 { 2626 gs_gop_local_in_times(gs,vals); 2627 2628 /* pairwise */ 2629 if (gs->num_pairs) 2630 {gs_gop_pairwise_times(gs,vals);} 2631 2632 /* tree */ 2633 else if (gs->max_left_over) 2634 {gs_gop_tree_times(gs,vals);} 2635 2636 gs_gop_local_out(gs,vals); 2637 } 2638 /* if intersection tree/pairwise and local is empty */ 2639 else 2640 { 2641 /* pairwise */ 2642 if (gs->num_pairs) 2643 {gs_gop_pairwise_times(gs,vals);} 2644 2645 /* tree */ 2646 else if (gs->max_left_over) 2647 {gs_gop_tree_times(gs,vals);} 2648 } 2649 } 2650 2651 2652 2653 /****************************************************************************** 2654 Function: gather_scatter 2655 2656 Input : 2657 Output: 2658 Return: 2659 Description: 2660 ******************************************************************************/ 2661 static 2662 void 2663 gs_gop_local_times( gs_id *gs, PetscScalar *vals) 2664 { 2665 int *num, *map, **reduce; 2666 PetscScalar tmp; 2667 2668 num = gs->num_local_reduce; 2669 reduce = gs->local_reduce; 2670 while ((map = *reduce)) 2671 { 2672 /* wall */ 2673 if (*num == 2) 2674 { 2675 num ++; reduce++; 2676 vals[map[1]] = vals[map[0]] *= vals[map[1]]; 2677 } 2678 /* corner shared by three elements */ 2679 else if (*num == 3) 2680 { 2681 num ++; reduce++; 2682 vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]); 2683 } 2684 /* corner shared by four elements */ 2685 else if (*num == 4) 2686 { 2687 num ++; reduce++; 2688 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *= 2689 (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2690 } 2691 /* general case ... odd geoms ... 3D*/ 2692 else 2693 { 2694 num ++; 2695 tmp = 1.0; 2696 while (*map >= 0) 2697 {tmp *= *(vals + *map++);} 2698 2699 map = *reduce++; 2700 while (*map >= 0) 2701 {*(vals + *map++) = tmp;} 2702 } 2703 } 2704 } 2705 2706 2707 2708 /****************************************************************************** 2709 Function: gather_scatter 2710 2711 Input : 2712 Output: 2713 Return: 2714 Description: 2715 ******************************************************************************/ 2716 static 2717 void 2718 gs_gop_local_in_times( gs_id *gs, PetscScalar *vals) 2719 { 2720 int *num, *map, **reduce; 2721 PetscScalar *base; 2722 2723 num = gs->num_gop_local_reduce; 2724 reduce = gs->gop_local_reduce; 2725 while ((map = *reduce++)) 2726 { 2727 /* wall */ 2728 if (*num == 2) 2729 { 2730 num ++; 2731 vals[map[0]] *= vals[map[1]]; 2732 } 2733 /* corner shared by three elements */ 2734 else if (*num == 3) 2735 { 2736 num ++; 2737 vals[map[0]] *= (vals[map[1]] * vals[map[2]]); 2738 } 2739 /* corner shared by four elements */ 2740 else if (*num == 4) 2741 { 2742 num ++; 2743 vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]); 2744 } 2745 /* general case ... odd geoms ... 3D*/ 2746 else 2747 { 2748 num++; 2749 base = vals + *map++; 2750 while (*map >= 0) 2751 {*base *= *(vals + *map++);} 2752 } 2753 } 2754 } 2755 2756 2757 2758 /****************************************************************************** 2759 Function: gather_scatter 2760 2761 VERSION 3 :: 2762 2763 Input : 2764 Output: 2765 Return: 2766 Description: 2767 ******************************************************************************/ 2768 static 2769 void 2770 gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals) 2771 { 2772 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 2773 int *iptr, *msg_list, *msg_size, **msg_nodes; 2774 int *pw, *list, *size, **nodes; 2775 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 2776 MPI_Status status; 2777 2778 2779 /* strip and load s */ 2780 msg_list =list = gs->pair_list; 2781 msg_size =size = gs->msg_sizes; 2782 msg_nodes=nodes = gs->node_list; 2783 iptr=pw = gs->pw_elm_list; 2784 dptr1=dptr3 = gs->pw_vals; 2785 msg_ids_in = ids_in = gs->msg_ids_in; 2786 msg_ids_out = ids_out = gs->msg_ids_out; 2787 dptr2 = gs->out; 2788 in1=in2 = gs->in; 2789 2790 /* post the receives */ 2791 /* msg_nodes=nodes; */ 2792 do 2793 { 2794 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 2795 second one *list and do list++ afterwards */ 2796 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 2797 gs->gs_comm, msg_ids_in++); 2798 in1 += *size++; 2799 } 2800 while (*++msg_nodes); 2801 msg_nodes=nodes; 2802 2803 /* load gs values into in out gs buffers */ 2804 while (*iptr >= 0) 2805 {*dptr3++ = *(in_vals + *iptr++);} 2806 2807 /* load out buffers and post the sends */ 2808 while ((iptr = *msg_nodes++)) 2809 { 2810 dptr3 = dptr2; 2811 while (*iptr >= 0) 2812 {*dptr2++ = *(dptr1 + *iptr++);} 2813 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 2814 /* is msg_ids_out++ correct? */ 2815 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 2816 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 2817 } 2818 2819 if (gs->max_left_over) 2820 {gs_gop_tree_times(gs,in_vals);} 2821 2822 /* process the received data */ 2823 msg_nodes=nodes; 2824 while ((iptr = *nodes++)) 2825 { 2826 /* Should I check the return value of MPI_Wait() or status? */ 2827 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2828 MPI_Wait(ids_in++, &status); 2829 while (*iptr >= 0) 2830 {*(dptr1 + *iptr++) *= *in2++;} 2831 } 2832 2833 /* replace vals */ 2834 while (*pw >= 0) 2835 {*(in_vals + *pw++) = *dptr1++;} 2836 2837 /* clear isend message handles */ 2838 /* This changed for clarity though it could be the same */ 2839 while (*msg_nodes++) 2840 /* Should I check the return value of MPI_Wait() or status? */ 2841 /* Can this loop be replaced by a call to MPI_Waitall()? */ 2842 {MPI_Wait(ids_out++, &status);} 2843 } 2844 2845 2846 2847 /****************************************************************************** 2848 Function: gather_scatter 2849 2850 Input : 2851 Output: 2852 Return: 2853 Description: 2854 ******************************************************************************/ 2855 static 2856 void 2857 gs_gop_tree_times(gs_id *gs, PetscScalar *vals) 2858 { 2859 int size; 2860 int *in, *out; 2861 PetscScalar *buf, *work; 2862 2863 in = gs->tree_map_in; 2864 out = gs->tree_map_out; 2865 buf = gs->tree_buf; 2866 work = gs->tree_work; 2867 size = gs->tree_nel; 2868 2869 rvec_one(buf,size); 2870 2871 while (*in >= 0) 2872 {*(buf + *out++) = *(vals + *in++);} 2873 2874 in = gs->tree_map_in; 2875 out = gs->tree_map_out; 2876 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm); 2877 while (*in >= 0) 2878 {*(vals + *in++) = *(work + *out++);} 2879 2880 } 2881 2882 2883 2884 /****************************************************************************** 2885 Function: gather_scatter 2886 2887 2888 Input : 2889 Output: 2890 Return: 2891 Description: 2892 ******************************************************************************/ 2893 static void 2894 gs_gop_plus( gs_id *gs, PetscScalar *vals) 2895 { 2896 2897 /* local only operations!!! */ 2898 if (gs->num_local) 2899 {gs_gop_local_plus(gs,vals);} 2900 2901 /* if intersection tree/pairwise and local isn't empty */ 2902 if (gs->num_local_gop) 2903 { 2904 gs_gop_local_in_plus(gs,vals); 2905 2906 /* pairwise will NOT do tree inside ... */ 2907 if (gs->num_pairs) 2908 {gs_gop_pairwise_plus(gs,vals);} 2909 2910 /* tree */ 2911 if (gs->max_left_over) 2912 {gs_gop_tree_plus(gs,vals);} 2913 2914 gs_gop_local_out(gs,vals); 2915 } 2916 /* if intersection tree/pairwise and local is empty */ 2917 else 2918 { 2919 /* pairwise will NOT do tree inside */ 2920 if (gs->num_pairs) 2921 {gs_gop_pairwise_plus(gs,vals);} 2922 2923 /* tree */ 2924 if (gs->max_left_over) 2925 {gs_gop_tree_plus(gs,vals);} 2926 } 2927 2928 } 2929 2930 2931 2932 /****************************************************************************** 2933 Function: gather_scatter 2934 2935 Input : 2936 Output: 2937 Return: 2938 Description: 2939 ******************************************************************************/ 2940 static 2941 void 2942 gs_gop_local_plus( gs_id *gs, PetscScalar *vals) 2943 { 2944 int *num, *map, **reduce; 2945 PetscScalar tmp; 2946 2947 2948 num = gs->num_local_reduce; 2949 reduce = gs->local_reduce; 2950 while ((map = *reduce)) 2951 { 2952 /* wall */ 2953 if (*num == 2) 2954 { 2955 num ++; reduce++; 2956 vals[map[1]] = vals[map[0]] += vals[map[1]]; 2957 } 2958 /* corner shared by three elements */ 2959 else if (*num == 3) 2960 { 2961 num ++; reduce++; 2962 vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]); 2963 } 2964 /* corner shared by four elements */ 2965 else if (*num == 4) 2966 { 2967 num ++; reduce++; 2968 vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] += 2969 (vals[map[1]] + vals[map[2]] + vals[map[3]]); 2970 } 2971 /* general case ... odd geoms ... 3D*/ 2972 else 2973 { 2974 num ++; 2975 tmp = 0.0; 2976 while (*map >= 0) 2977 {tmp += *(vals + *map++);} 2978 2979 map = *reduce++; 2980 while (*map >= 0) 2981 {*(vals + *map++) = tmp;} 2982 } 2983 } 2984 } 2985 2986 2987 2988 /****************************************************************************** 2989 Function: gather_scatter 2990 2991 Input : 2992 Output: 2993 Return: 2994 Description: 2995 ******************************************************************************/ 2996 static 2997 void 2998 gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals) 2999 { 3000 int *num, *map, **reduce; 3001 PetscScalar *base; 3002 3003 3004 num = gs->num_gop_local_reduce; 3005 reduce = gs->gop_local_reduce; 3006 while ((map = *reduce++)) 3007 { 3008 /* wall */ 3009 if (*num == 2) 3010 { 3011 num ++; 3012 vals[map[0]] += vals[map[1]]; 3013 } 3014 /* corner shared by three elements */ 3015 else if (*num == 3) 3016 { 3017 num ++; 3018 vals[map[0]] += (vals[map[1]] + vals[map[2]]); 3019 } 3020 /* corner shared by four elements */ 3021 else if (*num == 4) 3022 { 3023 num ++; 3024 vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]); 3025 } 3026 /* general case ... odd geoms ... 3D*/ 3027 else 3028 { 3029 num++; 3030 base = vals + *map++; 3031 while (*map >= 0) 3032 {*base += *(vals + *map++);} 3033 } 3034 } 3035 } 3036 3037 3038 3039 /****************************************************************************** 3040 Function: gather_scatter 3041 3042 VERSION 3 :: 3043 3044 Input : 3045 Output: 3046 Return: 3047 Description: 3048 ******************************************************************************/ 3049 static 3050 void 3051 gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals) 3052 { 3053 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3054 int *iptr, *msg_list, *msg_size, **msg_nodes; 3055 int *pw, *list, *size, **nodes; 3056 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3057 MPI_Status status; 3058 3059 3060 /* strip and load s */ 3061 msg_list =list = gs->pair_list; 3062 msg_size =size = gs->msg_sizes; 3063 msg_nodes=nodes = gs->node_list; 3064 iptr=pw = gs->pw_elm_list; 3065 dptr1=dptr3 = gs->pw_vals; 3066 msg_ids_in = ids_in = gs->msg_ids_in; 3067 msg_ids_out = ids_out = gs->msg_ids_out; 3068 dptr2 = gs->out; 3069 in1=in2 = gs->in; 3070 3071 /* post the receives */ 3072 /* msg_nodes=nodes; */ 3073 do 3074 { 3075 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3076 second one *list and do list++ afterwards */ 3077 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3078 gs->gs_comm, msg_ids_in++); 3079 in1 += *size++; 3080 } 3081 while (*++msg_nodes); 3082 msg_nodes=nodes; 3083 3084 /* load gs values into in out gs buffers */ 3085 while (*iptr >= 0) 3086 {*dptr3++ = *(in_vals + *iptr++);} 3087 3088 /* load out buffers and post the sends */ 3089 while ((iptr = *msg_nodes++)) 3090 { 3091 dptr3 = dptr2; 3092 while (*iptr >= 0) 3093 {*dptr2++ = *(dptr1 + *iptr++);} 3094 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3095 /* is msg_ids_out++ correct? */ 3096 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++, 3097 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3098 } 3099 3100 /* do the tree while we're waiting */ 3101 if (gs->max_left_over) 3102 {gs_gop_tree_plus(gs,in_vals);} 3103 3104 /* process the received data */ 3105 msg_nodes=nodes; 3106 while ((iptr = *nodes++)) 3107 { 3108 /* Should I check the return value of MPI_Wait() or status? */ 3109 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3110 MPI_Wait(ids_in++, &status); 3111 while (*iptr >= 0) 3112 {*(dptr1 + *iptr++) += *in2++;} 3113 } 3114 3115 /* replace vals */ 3116 while (*pw >= 0) 3117 {*(in_vals + *pw++) = *dptr1++;} 3118 3119 /* clear isend message handles */ 3120 /* This changed for clarity though it could be the same */ 3121 while (*msg_nodes++) 3122 /* Should I check the return value of MPI_Wait() or status? */ 3123 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3124 {MPI_Wait(ids_out++, &status);} 3125 3126 } 3127 3128 3129 3130 /****************************************************************************** 3131 Function: gather_scatter 3132 3133 Input : 3134 Output: 3135 Return: 3136 Description: 3137 ******************************************************************************/ 3138 static 3139 void 3140 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals) 3141 { 3142 int size; 3143 int *in, *out; 3144 PetscScalar *buf, *work; 3145 3146 in = gs->tree_map_in; 3147 out = gs->tree_map_out; 3148 buf = gs->tree_buf; 3149 work = gs->tree_work; 3150 size = gs->tree_nel; 3151 3152 rvec_zero(buf,size); 3153 3154 while (*in >= 0) 3155 {*(buf + *out++) = *(vals + *in++);} 3156 3157 in = gs->tree_map_in; 3158 out = gs->tree_map_out; 3159 MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm); 3160 while (*in >= 0) 3161 {*(vals + *in++) = *(work + *out++);} 3162 3163 } 3164 3165 /****************************************************************************** 3166 Function: gs_free() 3167 3168 Input : 3169 3170 Output: 3171 3172 Return: 3173 3174 Description: 3175 if (gs->sss) {free((void*) gs->sss);} 3176 ******************************************************************************/ 3177 void 3178 gs_free( gs_id *gs) 3179 { 3180 int i; 3181 3182 3183 if (gs->nghs) {free((void*) gs->nghs);} 3184 if (gs->pw_nghs) {free((void*) gs->pw_nghs);} 3185 3186 /* tree */ 3187 if (gs->max_left_over) 3188 { 3189 if (gs->tree_elms) {free((void*) gs->tree_elms);} 3190 if (gs->tree_buf) {free((void*) gs->tree_buf);} 3191 if (gs->tree_work) {free((void*) gs->tree_work);} 3192 if (gs->tree_map_in) {free((void*) gs->tree_map_in);} 3193 if (gs->tree_map_out) {free((void*) gs->tree_map_out);} 3194 } 3195 3196 /* pairwise info */ 3197 if (gs->num_pairs) 3198 { 3199 /* should be NULL already */ 3200 if (gs->ngh_buf) {free((void*) gs->ngh_buf);} 3201 if (gs->elms) {free((void*) gs->elms);} 3202 if (gs->local_elms) {free((void*) gs->local_elms);} 3203 if (gs->companion) {free((void*) gs->companion);} 3204 3205 /* only set if pairwise */ 3206 if (gs->vals) {free((void*) gs->vals);} 3207 if (gs->in) {free((void*) gs->in);} 3208 if (gs->out) {free((void*) gs->out);} 3209 if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);} 3210 if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);} 3211 if (gs->pw_vals) {free((void*) gs->pw_vals);} 3212 if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);} 3213 if (gs->node_list) 3214 { 3215 for (i=0;i<gs->num_pairs;i++) 3216 {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}} 3217 free((void*) gs->node_list); 3218 } 3219 if (gs->msg_sizes) {free((void*) gs->msg_sizes);} 3220 if (gs->pair_list) {free((void*) gs->pair_list);} 3221 } 3222 3223 /* local info */ 3224 if (gs->num_local_total>=0) 3225 { 3226 for (i=0;i<gs->num_local_total+1;i++) 3227 /* for (i=0;i<gs->num_local_total;i++) */ 3228 { 3229 if (gs->num_gop_local_reduce[i]) 3230 {free((void*) gs->gop_local_reduce[i]);} 3231 } 3232 } 3233 3234 /* if intersection tree/pairwise and local isn't empty */ 3235 if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);} 3236 if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);} 3237 3238 free((void*) gs); 3239 } 3240 3241 3242 3243 3244 3245 3246 /****************************************************************************** 3247 Function: gather_scatter 3248 3249 Input : 3250 Output: 3251 Return: 3252 Description: 3253 ******************************************************************************/ 3254 void 3255 gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step) 3256 { 3257 3258 switch (*op) { 3259 case '+': 3260 gs_gop_vec_plus(gs,vals,step); 3261 break; 3262 default: 3263 error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]); 3264 error_msg_warning("gs_gop_vec() :: default :: plus"); 3265 gs_gop_vec_plus(gs,vals,step); 3266 break; 3267 } 3268 } 3269 3270 3271 3272 /****************************************************************************** 3273 Function: gather_scatter 3274 3275 Input : 3276 Output: 3277 Return: 3278 Description: 3279 ******************************************************************************/ 3280 static void 3281 gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step) 3282 { 3283 if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");} 3284 3285 /* local only operations!!! */ 3286 if (gs->num_local) 3287 {gs_gop_vec_local_plus(gs,vals,step);} 3288 3289 /* if intersection tree/pairwise and local isn't empty */ 3290 if (gs->num_local_gop) 3291 { 3292 gs_gop_vec_local_in_plus(gs,vals,step); 3293 3294 /* pairwise */ 3295 if (gs->num_pairs) 3296 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3297 3298 /* tree */ 3299 else if (gs->max_left_over) 3300 {gs_gop_vec_tree_plus(gs,vals,step);} 3301 3302 gs_gop_vec_local_out(gs,vals,step); 3303 } 3304 /* if intersection tree/pairwise and local is empty */ 3305 else 3306 { 3307 /* pairwise */ 3308 if (gs->num_pairs) 3309 {gs_gop_vec_pairwise_plus(gs,vals,step);} 3310 3311 /* tree */ 3312 else if (gs->max_left_over) 3313 {gs_gop_vec_tree_plus(gs,vals,step);} 3314 } 3315 } 3316 3317 3318 3319 /****************************************************************************** 3320 Function: gather_scatter 3321 3322 Input : 3323 Output: 3324 Return: 3325 Description: 3326 ******************************************************************************/ 3327 static 3328 void 3329 gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals, 3330 int step) 3331 { 3332 int *num, *map, **reduce; 3333 PetscScalar *base; 3334 3335 3336 num = gs->num_local_reduce; 3337 reduce = gs->local_reduce; 3338 while ((map = *reduce)) 3339 { 3340 base = vals + map[0] * step; 3341 3342 /* wall */ 3343 if (*num == 2) 3344 { 3345 num++; reduce++; 3346 rvec_add (base,vals+map[1]*step,step); 3347 rvec_copy(vals+map[1]*step,base,step); 3348 } 3349 /* corner shared by three elements */ 3350 else if (*num == 3) 3351 { 3352 num++; reduce++; 3353 rvec_add (base,vals+map[1]*step,step); 3354 rvec_add (base,vals+map[2]*step,step); 3355 rvec_copy(vals+map[2]*step,base,step); 3356 rvec_copy(vals+map[1]*step,base,step); 3357 } 3358 /* corner shared by four elements */ 3359 else if (*num == 4) 3360 { 3361 num++; reduce++; 3362 rvec_add (base,vals+map[1]*step,step); 3363 rvec_add (base,vals+map[2]*step,step); 3364 rvec_add (base,vals+map[3]*step,step); 3365 rvec_copy(vals+map[3]*step,base,step); 3366 rvec_copy(vals+map[2]*step,base,step); 3367 rvec_copy(vals+map[1]*step,base,step); 3368 } 3369 /* general case ... odd geoms ... 3D */ 3370 else 3371 { 3372 num++; 3373 while (*++map >= 0) 3374 {rvec_add (base,vals+*map*step,step);} 3375 3376 map = *reduce; 3377 while (*++map >= 0) 3378 {rvec_copy(vals+*map*step,base,step);} 3379 3380 reduce++; 3381 } 3382 } 3383 } 3384 3385 3386 3387 /****************************************************************************** 3388 Function: gather_scatter 3389 3390 Input : 3391 Output: 3392 Return: 3393 Description: 3394 ******************************************************************************/ 3395 static 3396 void 3397 gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals, 3398 int step) 3399 { 3400 int *num, *map, **reduce; 3401 PetscScalar *base; 3402 3403 num = gs->num_gop_local_reduce; 3404 reduce = gs->gop_local_reduce; 3405 while ((map = *reduce++)) 3406 { 3407 base = vals + map[0] * step; 3408 3409 /* wall */ 3410 if (*num == 2) 3411 { 3412 num ++; 3413 rvec_add(base,vals+map[1]*step,step); 3414 } 3415 /* corner shared by three elements */ 3416 else if (*num == 3) 3417 { 3418 num ++; 3419 rvec_add(base,vals+map[1]*step,step); 3420 rvec_add(base,vals+map[2]*step,step); 3421 } 3422 /* corner shared by four elements */ 3423 else if (*num == 4) 3424 { 3425 num ++; 3426 rvec_add(base,vals+map[1]*step,step); 3427 rvec_add(base,vals+map[2]*step,step); 3428 rvec_add(base,vals+map[3]*step,step); 3429 } 3430 /* general case ... odd geoms ... 3D*/ 3431 else 3432 { 3433 num++; 3434 while (*++map >= 0) 3435 {rvec_add(base,vals+*map*step,step);} 3436 } 3437 } 3438 } 3439 3440 3441 /****************************************************************************** 3442 Function: gather_scatter 3443 3444 Input : 3445 Output: 3446 Return: 3447 Description: 3448 ******************************************************************************/ 3449 static 3450 void 3451 gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals, 3452 int step) 3453 { 3454 int *num, *map, **reduce; 3455 PetscScalar *base; 3456 3457 3458 num = gs->num_gop_local_reduce; 3459 reduce = gs->gop_local_reduce; 3460 while ((map = *reduce++)) 3461 { 3462 base = vals + map[0] * step; 3463 3464 /* wall */ 3465 if (*num == 2) 3466 { 3467 num ++; 3468 rvec_copy(vals+map[1]*step,base,step); 3469 } 3470 /* corner shared by three elements */ 3471 else if (*num == 3) 3472 { 3473 num ++; 3474 rvec_copy(vals+map[1]*step,base,step); 3475 rvec_copy(vals+map[2]*step,base,step); 3476 } 3477 /* corner shared by four elements */ 3478 else if (*num == 4) 3479 { 3480 num ++; 3481 rvec_copy(vals+map[1]*step,base,step); 3482 rvec_copy(vals+map[2]*step,base,step); 3483 rvec_copy(vals+map[3]*step,base,step); 3484 } 3485 /* general case ... odd geoms ... 3D*/ 3486 else 3487 { 3488 num++; 3489 while (*++map >= 0) 3490 {rvec_copy(vals+*map*step,base,step);} 3491 } 3492 } 3493 } 3494 3495 3496 3497 /****************************************************************************** 3498 Function: gather_scatter 3499 3500 VERSION 3 :: 3501 3502 Input : 3503 Output: 3504 Return: 3505 Description: 3506 ******************************************************************************/ 3507 static 3508 void 3509 gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, 3510 int step) 3511 { 3512 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3513 int *iptr, *msg_list, *msg_size, **msg_nodes; 3514 int *pw, *list, *size, **nodes; 3515 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3516 MPI_Status status; 3517 PetscBLASInt i1; 3518 3519 3520 /* strip and load s */ 3521 msg_list =list = gs->pair_list; 3522 msg_size =size = gs->msg_sizes; 3523 msg_nodes=nodes = gs->node_list; 3524 iptr=pw = gs->pw_elm_list; 3525 dptr1=dptr3 = gs->pw_vals; 3526 msg_ids_in = ids_in = gs->msg_ids_in; 3527 msg_ids_out = ids_out = gs->msg_ids_out; 3528 dptr2 = gs->out; 3529 in1=in2 = gs->in; 3530 3531 /* post the receives */ 3532 /* msg_nodes=nodes; */ 3533 do 3534 { 3535 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3536 second one *list and do list++ afterwards */ 3537 MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3538 gs->gs_comm, msg_ids_in++); 3539 in1 += *size++ *step; 3540 } 3541 while (*++msg_nodes); 3542 msg_nodes=nodes; 3543 3544 /* load gs values into in out gs buffers */ 3545 while (*iptr >= 0) 3546 { 3547 rvec_copy(dptr3,in_vals + *iptr*step,step); 3548 dptr3+=step; 3549 iptr++; 3550 } 3551 3552 /* load out buffers and post the sends */ 3553 while ((iptr = *msg_nodes++)) 3554 { 3555 dptr3 = dptr2; 3556 while (*iptr >= 0) 3557 { 3558 rvec_copy(dptr2,dptr1 + *iptr*step,step); 3559 dptr2+=step; 3560 iptr++; 3561 } 3562 MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++, 3563 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3564 } 3565 3566 /* tree */ 3567 if (gs->max_left_over) 3568 {gs_gop_vec_tree_plus(gs,in_vals,step);} 3569 3570 /* process the received data */ 3571 msg_nodes=nodes; 3572 while ((iptr = *nodes++)){ 3573 PetscScalar d1 = 1.0; 3574 /* Should I check the return value of MPI_Wait() or status? */ 3575 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3576 MPI_Wait(ids_in++, &status); 3577 while (*iptr >= 0) { 3578 BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1); 3579 in2+=step; 3580 iptr++; 3581 } 3582 } 3583 3584 /* replace vals */ 3585 while (*pw >= 0) 3586 { 3587 rvec_copy(in_vals + *pw*step,dptr1,step); 3588 dptr1+=step; 3589 pw++; 3590 } 3591 3592 /* clear isend message handles */ 3593 /* This changed for clarity though it could be the same */ 3594 while (*msg_nodes++) 3595 /* Should I check the return value of MPI_Wait() or status? */ 3596 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3597 {MPI_Wait(ids_out++, &status);} 3598 3599 3600 } 3601 3602 3603 3604 /****************************************************************************** 3605 Function: gather_scatter 3606 3607 Input : 3608 Output: 3609 Return: 3610 Description: 3611 ******************************************************************************/ 3612 static 3613 void 3614 gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step) 3615 { 3616 int size, *in, *out; 3617 PetscScalar *buf, *work; 3618 int op[] = {GL_ADD,0}; 3619 PetscBLASInt i1 = 1; 3620 3621 3622 /* copy over to local variables */ 3623 in = gs->tree_map_in; 3624 out = gs->tree_map_out; 3625 buf = gs->tree_buf; 3626 work = gs->tree_work; 3627 size = gs->tree_nel*step; 3628 3629 /* zero out collection buffer */ 3630 rvec_zero(buf,size); 3631 3632 3633 /* copy over my contributions */ 3634 while (*in >= 0) 3635 { 3636 BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1); 3637 } 3638 3639 /* perform fan in/out on full buffer */ 3640 /* must change grop to handle the blas */ 3641 grop(buf,work,size,op); 3642 3643 /* reset */ 3644 in = gs->tree_map_in; 3645 out = gs->tree_map_out; 3646 3647 /* get the portion of the results I need */ 3648 while (*in >= 0) 3649 { 3650 BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1); 3651 } 3652 3653 } 3654 3655 3656 3657 /****************************************************************************** 3658 Function: gather_scatter 3659 3660 Input : 3661 Output: 3662 Return: 3663 Description: 3664 ******************************************************************************/ 3665 void 3666 gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim) 3667 { 3668 3669 switch (*op) { 3670 case '+': 3671 gs_gop_plus_hc(gs,vals,dim); 3672 break; 3673 default: 3674 error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]); 3675 error_msg_warning("gs_gop_hc() :: default :: plus\n"); 3676 gs_gop_plus_hc(gs,vals,dim); 3677 break; 3678 } 3679 } 3680 3681 3682 3683 /****************************************************************************** 3684 Function: gather_scatter 3685 3686 Input : 3687 Output: 3688 Return: 3689 Description: 3690 ******************************************************************************/ 3691 static void 3692 gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim) 3693 { 3694 /* if there's nothing to do return */ 3695 if (dim<=0) 3696 {return;} 3697 3698 /* can't do more dimensions then exist */ 3699 dim = PetscMin(dim,i_log2_num_nodes); 3700 3701 /* local only operations!!! */ 3702 if (gs->num_local) 3703 {gs_gop_local_plus(gs,vals);} 3704 3705 /* if intersection tree/pairwise and local isn't empty */ 3706 if (gs->num_local_gop) 3707 { 3708 gs_gop_local_in_plus(gs,vals); 3709 3710 /* pairwise will do tree inside ... */ 3711 if (gs->num_pairs) 3712 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3713 3714 /* tree only */ 3715 else if (gs->max_left_over) 3716 {gs_gop_tree_plus_hc(gs,vals,dim);} 3717 3718 gs_gop_local_out(gs,vals); 3719 } 3720 /* if intersection tree/pairwise and local is empty */ 3721 else 3722 { 3723 /* pairwise will do tree inside */ 3724 if (gs->num_pairs) 3725 {gs_gop_pairwise_plus_hc(gs,vals,dim);} 3726 3727 /* tree */ 3728 else if (gs->max_left_over) 3729 {gs_gop_tree_plus_hc(gs,vals,dim);} 3730 } 3731 3732 } 3733 3734 3735 /****************************************************************************** 3736 VERSION 3 :: 3737 3738 Input : 3739 Output: 3740 Return: 3741 Description: 3742 ******************************************************************************/ 3743 static 3744 void 3745 gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim) 3746 { 3747 PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; 3748 int *iptr, *msg_list, *msg_size, **msg_nodes; 3749 int *pw, *list, *size, **nodes; 3750 MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; 3751 MPI_Status status; 3752 int i, mask=1; 3753 3754 for (i=1; i<dim; i++) 3755 {mask<<=1; mask++;} 3756 3757 3758 /* strip and load s */ 3759 msg_list =list = gs->pair_list; 3760 msg_size =size = gs->msg_sizes; 3761 msg_nodes=nodes = gs->node_list; 3762 iptr=pw = gs->pw_elm_list; 3763 dptr1=dptr3 = gs->pw_vals; 3764 msg_ids_in = ids_in = gs->msg_ids_in; 3765 msg_ids_out = ids_out = gs->msg_ids_out; 3766 dptr2 = gs->out; 3767 in1=in2 = gs->in; 3768 3769 /* post the receives */ 3770 /* msg_nodes=nodes; */ 3771 do 3772 { 3773 /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the 3774 second one *list and do list++ afterwards */ 3775 if ((my_id|mask)==(*list|mask)) 3776 { 3777 MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++, 3778 gs->gs_comm, msg_ids_in++); 3779 in1 += *size++; 3780 } 3781 else 3782 {list++; size++;} 3783 } 3784 while (*++msg_nodes); 3785 3786 /* load gs values into in out gs buffers */ 3787 while (*iptr >= 0) 3788 {*dptr3++ = *(in_vals + *iptr++);} 3789 3790 /* load out buffers and post the sends */ 3791 msg_nodes=nodes; 3792 list = msg_list; 3793 while ((iptr = *msg_nodes++)) 3794 { 3795 if ((my_id|mask)==(*list|mask)) 3796 { 3797 dptr3 = dptr2; 3798 while (*iptr >= 0) 3799 {*dptr2++ = *(dptr1 + *iptr++);} 3800 /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */ 3801 /* is msg_ids_out++ correct? */ 3802 MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++, 3803 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++); 3804 } 3805 else 3806 {list++; msg_size++;} 3807 } 3808 3809 /* do the tree while we're waiting */ 3810 if (gs->max_left_over) 3811 {gs_gop_tree_plus_hc(gs,in_vals,dim);} 3812 3813 /* process the received data */ 3814 msg_nodes=nodes; 3815 list = msg_list; 3816 while ((iptr = *nodes++)) 3817 { 3818 if ((my_id|mask)==(*list|mask)) 3819 { 3820 /* Should I check the return value of MPI_Wait() or status? */ 3821 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3822 MPI_Wait(ids_in++, &status); 3823 while (*iptr >= 0) 3824 {*(dptr1 + *iptr++) += *in2++;} 3825 } 3826 list++; 3827 } 3828 3829 /* replace vals */ 3830 while (*pw >= 0) 3831 {*(in_vals + *pw++) = *dptr1++;} 3832 3833 /* clear isend message handles */ 3834 /* This changed for clarity though it could be the same */ 3835 while (*msg_nodes++) 3836 { 3837 if ((my_id|mask)==(*msg_list|mask)) 3838 { 3839 /* Should I check the return value of MPI_Wait() or status? */ 3840 /* Can this loop be replaced by a call to MPI_Waitall()? */ 3841 MPI_Wait(ids_out++, &status); 3842 } 3843 msg_list++; 3844 } 3845 3846 3847 } 3848 3849 3850 3851 /****************************************************************************** 3852 Function: gather_scatter 3853 3854 Input : 3855 Output: 3856 Return: 3857 Description: 3858 ******************************************************************************/ 3859 static 3860 void 3861 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim) 3862 { 3863 int size; 3864 int *in, *out; 3865 PetscScalar *buf, *work; 3866 int op[] = {GL_ADD,0}; 3867 3868 in = gs->tree_map_in; 3869 out = gs->tree_map_out; 3870 buf = gs->tree_buf; 3871 work = gs->tree_work; 3872 size = gs->tree_nel; 3873 3874 rvec_zero(buf,size); 3875 3876 while (*in >= 0) 3877 {*(buf + *out++) = *(vals + *in++);} 3878 3879 in = gs->tree_map_in; 3880 out = gs->tree_map_out; 3881 3882 grop_hc(buf,work,size,op,dim); 3883 3884 while (*in >= 0) 3885 {*(vals + *in++) = *(buf + *out++);} 3886 3887 } 3888 3889 3890 3891