1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 #include "src/ksp/pc/impls/tfs/tfs.h" 18 19 20 /* global program control variables - explicitly exported */ 21 PetscMPIInt my_id = 0; 22 PetscMPIInt num_nodes = 1; 23 PetscMPIInt floor_num_nodes = 0; 24 PetscMPIInt i_log2_num_nodes = 0; 25 26 /* global program control variables */ 27 static PetscInt p_init = 0; 28 static PetscInt modfl_num_nodes; 29 static PetscInt edge_not_pow_2; 30 31 static PetscInt edge_node[sizeof(PetscInt)*32]; 32 33 /***********************************comm.c*************************************/ 34 PetscErrorCode comm_init (void) 35 { 36 37 if (p_init++) PetscFunctionReturn(0); 38 39 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 40 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 41 42 if (num_nodes> (INT_MAX >> 1)) 43 {SETERRQ(PETSC_ERR_PLIB,"Can't have more then MAX_INT/2 nodes!!!");} 44 45 ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32); 46 47 floor_num_nodes = 1; 48 i_log2_num_nodes = modfl_num_nodes = 0; 49 while (floor_num_nodes <= num_nodes) 50 { 51 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 52 floor_num_nodes <<= 1; 53 i_log2_num_nodes++; 54 } 55 56 i_log2_num_nodes--; 57 floor_num_nodes >>= 1; 58 modfl_num_nodes = (num_nodes - floor_num_nodes); 59 60 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 61 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 62 else if (my_id >= floor_num_nodes) 63 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 64 } 65 else 66 {edge_not_pow_2 = 0;} 67 PetscFunctionReturn(0); 68 } 69 70 /***********************************comm.c*************************************/ 71 PetscErrorCode giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs) 72 { 73 PetscInt mask, edge; 74 PetscInt type, dest; 75 vfp fp; 76 MPI_Status status; 77 PetscInt ierr; 78 79 PetscFunctionBegin; 80 /* ok ... should have some data, work, and operator(s) */ 81 if (!vals||!work||!oprs) 82 {SETERRQ3(PETSC_ERR_PLIB,"giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 83 84 /* non-uniform should have at least two entries */ 85 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 86 {SETERRQ(PETSC_ERR_PLIB,"giop() :: non_uniform and n=0,1?");} 87 88 /* check to make sure comm package has been initialized */ 89 if (!p_init) 90 {comm_init();} 91 92 /* if there's nothing to do return */ 93 if ((num_nodes<2)||(!n)) 94 { 95 PetscFunctionReturn(0); 96 } 97 98 99 /* a negative number if items to send ==> fatal */ 100 if (n<0) 101 {SETERRQ1(PETSC_ERR_PLIB,"giop() :: n=%D<0?",n);} 102 103 /* advance to list of n operations for custom */ 104 if ((type=oprs[0])==NON_UNIFORM) 105 {oprs++;} 106 107 /* major league hack */ 108 if (!(fp = (vfp) ivec_fct_addr(type))) { 109 ierr = PetscInfo(0,"giop() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 110 fp = (vfp) oprs; 111 } 112 113 /* all msgs will be of the same length */ 114 /* if not a hypercube must colapse partial dim */ 115 if (edge_not_pow_2) 116 { 117 if (my_id >= floor_num_nodes) 118 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 119 else 120 { 121 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 122 (*fp)(vals,work,n,oprs); 123 } 124 } 125 126 /* implement the mesh fan in/out exchange algorithm */ 127 if (my_id<floor_num_nodes) 128 { 129 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 130 { 131 dest = my_id^mask; 132 if (my_id > dest) 133 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 134 else 135 { 136 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 137 (*fp)(vals, work, n, oprs); 138 } 139 } 140 141 mask=floor_num_nodes>>1; 142 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 143 { 144 if (my_id%mask) 145 {continue;} 146 147 dest = my_id^mask; 148 if (my_id < dest) 149 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 150 else 151 { 152 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 153 } 154 } 155 } 156 157 /* if not a hypercube must expand to partial dim */ 158 if (edge_not_pow_2) 159 { 160 if (my_id >= floor_num_nodes) 161 { 162 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 163 } 164 else 165 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 166 } 167 PetscFunctionReturn(0); 168 } 169 170 /***********************************comm.c*************************************/ 171 PetscErrorCode grop(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs) 172 { 173 PetscInt mask, edge; 174 PetscInt type, dest; 175 vfp fp; 176 MPI_Status status; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 /* ok ... should have some data, work, and operator(s) */ 181 if (!vals||!work||!oprs) 182 {SETERRQ3(PETSC_ERR_PLIB,"grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 183 184 /* non-uniform should have at least two entries */ 185 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 186 {SETERRQ(PETSC_ERR_PLIB,"grop() :: non_uniform and n=0,1?");} 187 188 /* check to make sure comm package has been initialized */ 189 if (!p_init) 190 {comm_init();} 191 192 /* if there's nothing to do return */ 193 if ((num_nodes<2)||(!n)) 194 { PetscFunctionReturn(0);} 195 196 /* a negative number of items to send ==> fatal */ 197 if (n<0) 198 {SETERRQ1(PETSC_ERR_PLIB,"gdop() :: n=%D<0?",n);} 199 200 /* advance to list of n operations for custom */ 201 if ((type=oprs[0])==NON_UNIFORM) 202 {oprs++;} 203 204 if (!(fp = (vfp) rvec_fct_addr(type))) { 205 ierr = PetscInfo(0,"grop() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 206 fp = (vfp) oprs; 207 } 208 209 /* all msgs will be of the same length */ 210 /* if not a hypercube must colapse partial dim */ 211 if (edge_not_pow_2) 212 { 213 if (my_id >= floor_num_nodes) 214 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 215 else 216 { 217 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 218 (*fp)(vals,work,n,oprs); 219 } 220 } 221 222 /* implement the mesh fan in/out exchange algorithm */ 223 if (my_id<floor_num_nodes) 224 { 225 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 226 { 227 dest = my_id^mask; 228 if (my_id > dest) 229 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 230 else 231 { 232 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 233 (*fp)(vals, work, n, oprs); 234 } 235 } 236 237 mask=floor_num_nodes>>1; 238 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 239 { 240 if (my_id%mask) 241 {continue;} 242 243 dest = my_id^mask; 244 if (my_id < dest) 245 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 246 else 247 { 248 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 249 } 250 } 251 } 252 253 /* if not a hypercube must expand to partial dim */ 254 if (edge_not_pow_2) 255 { 256 if (my_id >= floor_num_nodes) 257 { 258 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 259 } 260 else 261 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 262 } 263 PetscFunctionReturn(0); 264 } 265 266 /***********************************comm.c*************************************/ 267 PetscErrorCode grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs, PetscInt dim) 268 { 269 PetscInt mask, edge; 270 PetscInt type, dest; 271 vfp fp; 272 MPI_Status status; 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 /* ok ... should have some data, work, and operator(s) */ 277 if (!vals||!work||!oprs) 278 {SETERRQ3(PETSC_ERR_PLIB,"grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 279 280 /* non-uniform should have at least two entries */ 281 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 282 {SETERRQ(PETSC_ERR_PLIB,"grop_hc() :: non_uniform and n=0,1?");} 283 284 /* check to make sure comm package has been initialized */ 285 if (!p_init) 286 {comm_init();} 287 288 /* if there's nothing to do return */ 289 if ((num_nodes<2)||(!n)||(dim<=0)) 290 {PetscFunctionReturn(0);} 291 292 /* the error msg says it all!!! */ 293 if (modfl_num_nodes) 294 {SETERRQ(PETSC_ERR_PLIB,"grop_hc() :: num_nodes not a power of 2!?!");} 295 296 /* a negative number of items to send ==> fatal */ 297 if (n<0) 298 {SETERRQ1(PETSC_ERR_PLIB,"grop_hc() :: n=%D<0?",n);} 299 300 /* can't do more dimensions then exist */ 301 dim = PetscMin(dim,i_log2_num_nodes); 302 303 /* advance to list of n operations for custom */ 304 if ((type=oprs[0])==NON_UNIFORM) 305 {oprs++;} 306 307 if (!(fp = (vfp) rvec_fct_addr(type))) { 308 ierr = PetscInfo(0,"grop_hc() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 309 fp = (vfp) oprs; 310 } 311 312 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 313 { 314 dest = my_id^mask; 315 if (my_id > dest) 316 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 317 else 318 { 319 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 320 (*fp)(vals, work, n, oprs); 321 } 322 } 323 324 if (edge==dim) 325 {mask>>=1;} 326 else 327 {while (++edge<dim) {mask<<=1;}} 328 329 for (edge=0; edge<dim; edge++,mask>>=1) 330 { 331 if (my_id%mask) 332 {continue;} 333 334 dest = my_id^mask; 335 if (my_id < dest) 336 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 337 else 338 { 339 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 340 } 341 } 342 PetscFunctionReturn(0); 343 } 344 345 /******************************************************************************/ 346 PetscErrorCode ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs) 347 { 348 PetscInt edge, type, dest, mask; 349 PetscInt stage_n; 350 MPI_Status status; 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 /* check to make sure comm package has been initialized */ 355 if (!p_init) 356 {comm_init();} 357 358 359 /* all msgs are *NOT* the same length */ 360 /* implement the mesh fan in/out exchange algorithm */ 361 for (mask=0, edge=0; edge<level; edge++, mask++) 362 { 363 stage_n = (segs[level] - segs[edge]); 364 if (stage_n && !(my_id & mask)) 365 { 366 dest = edge_node[edge]; 367 type = MSGTAG3 + my_id + (num_nodes*edge); 368 if (my_id>dest) 369 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 370 else 371 { 372 type = type - my_id + dest; 373 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 374 rvec_add(vals+segs[edge], work, stage_n); 375 } 376 } 377 mask <<= 1; 378 } 379 mask>>=1; 380 for (edge=0; edge<level; edge++) 381 { 382 stage_n = (segs[level] - segs[level-1-edge]); 383 if (stage_n && !(my_id & mask)) 384 { 385 dest = edge_node[level-edge-1]; 386 type = MSGTAG6 + my_id + (num_nodes*edge); 387 if (my_id<dest) 388 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 389 else 390 { 391 type = type - my_id + dest; 392 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 393 } 394 } 395 mask >>= 1; 396 } 397 PetscFunctionReturn(0); 398 } 399 400 /******************************************************************************/ 401 PetscErrorCode new_ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs) 402 { 403 PetscInt edge, type, dest, mask; 404 PetscInt stage_n; 405 MPI_Status status; 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 /* check to make sure comm package has been initialized */ 410 if (!p_init) 411 {comm_init();} 412 413 /* all msgs are *NOT* the same length */ 414 /* implement the mesh fan in/out exchange algorithm */ 415 for (mask=0, edge=0; edge<level; edge++, mask++) 416 { 417 stage_n = (segs[level] - segs[edge]); 418 if (stage_n && !(my_id & mask)) 419 { 420 dest = edge_node[edge]; 421 type = MSGTAG3 + my_id + (num_nodes*edge); 422 if (my_id>dest) 423 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 424 else 425 { 426 type = type - my_id + dest; 427 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 428 rvec_add(vals+segs[edge], work, stage_n); 429 } 430 } 431 mask <<= 1; 432 } 433 mask>>=1; 434 for (edge=0; edge<level; edge++) 435 { 436 stage_n = (segs[level] - segs[level-1-edge]); 437 if (stage_n && !(my_id & mask)) 438 { 439 dest = edge_node[level-edge-1]; 440 type = MSGTAG6 + my_id + (num_nodes*edge); 441 if (my_id<dest) 442 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 443 else 444 { 445 type = type - my_id + dest; 446 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 447 } 448 } 449 mask >>= 1; 450 } 451 PetscFunctionReturn(0); 452 } 453 454 /***********************************comm.c*************************************/ 455 PetscErrorCode giop_hc(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs, PetscInt dim) 456 { 457 PetscInt mask, edge; 458 PetscInt type, dest; 459 vfp fp; 460 MPI_Status status; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 /* ok ... should have some data, work, and operator(s) */ 465 if (!vals||!work||!oprs) 466 {SETERRQ3(PETSC_ERR_PLIB,"giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);} 467 468 /* non-uniform should have at least two entries */ 469 if ((oprs[0] == NON_UNIFORM)&&(n<2)) 470 {SETERRQ(PETSC_ERR_PLIB,"giop_hc() :: non_uniform and n=0,1?");} 471 472 /* check to make sure comm package has been initialized */ 473 if (!p_init) 474 {comm_init();} 475 476 /* if there's nothing to do return */ 477 if ((num_nodes<2)||(!n)||(dim<=0)) 478 { PetscFunctionReturn(0);} 479 480 /* the error msg says it all!!! */ 481 if (modfl_num_nodes) 482 {SETERRQ(PETSC_ERR_PLIB,"giop_hc() :: num_nodes not a power of 2!?!");} 483 484 /* a negative number of items to send ==> fatal */ 485 if (n<0) 486 {SETERRQ1(PETSC_ERR_PLIB,"giop_hc() :: n=%D<0?",n);} 487 488 /* can't do more dimensions then exist */ 489 dim = PetscMin(dim,i_log2_num_nodes); 490 491 /* advance to list of n operations for custom */ 492 if ((type=oprs[0])==NON_UNIFORM) 493 {oprs++;} 494 495 if (!(fp = (vfp) ivec_fct_addr(type))){ 496 ierr = PetscInfo(0,"giop_hc() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 497 fp = (vfp) oprs; 498 } 499 500 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 501 { 502 dest = my_id^mask; 503 if (my_id > dest) 504 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 505 else 506 { 507 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 508 (*fp)(vals, work, n, oprs); 509 } 510 } 511 512 if (edge==dim) 513 {mask>>=1;} 514 else 515 {while (++edge<dim) {mask<<=1;}} 516 517 for (edge=0; edge<dim; edge++,mask>>=1) 518 { 519 if (my_id%mask) 520 {continue;} 521 522 dest = my_id^mask; 523 if (my_id < dest) 524 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 525 else 526 { 527 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 528 } 529 } 530 PetscFunctionReturn(0); 531 } 532