1 #define PETSCKSP_DLL 2 3 /***********************************comm.c************************************* 4 5 Author: Henry M. Tufo III 6 7 e-mail: hmt@cs.brown.edu 8 9 snail-mail: 10 Division of Applied Mathematics 11 Brown University 12 Providence, RI 02912 13 14 Last Modification: 15 11.21.97 16 ***********************************comm.c*************************************/ 17 #include "../src/ksp/pc/impls/tfs/tfs.h" 18 19 20 /* global program control variables - explicitly exported */ 21 PetscMPIInt my_id = 0; 22 PetscMPIInt num_nodes = 1; 23 PetscMPIInt floor_num_nodes = 0; 24 PetscMPIInt i_log2_num_nodes = 0; 25 26 /* global program control variables */ 27 static PetscInt p_init = 0; 28 static PetscInt modfl_num_nodes; 29 static PetscInt edge_not_pow_2; 30 31 static PetscInt edge_node[sizeof(PetscInt)*32]; 32 33 /***********************************comm.c*************************************/ 34 PetscErrorCode comm_init (void) 35 { 36 37 if (p_init++) PetscFunctionReturn(0); 38 39 MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); 40 MPI_Comm_rank(MPI_COMM_WORLD,&my_id); 41 42 if (num_nodes> (INT_MAX >> 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Can't have more then MAX_INT/2 nodes!!!"); 43 44 ivec_zero((PetscInt*)edge_node,sizeof(PetscInt)*32); 45 46 floor_num_nodes = 1; 47 i_log2_num_nodes = modfl_num_nodes = 0; 48 while (floor_num_nodes <= num_nodes) 49 { 50 edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes; 51 floor_num_nodes <<= 1; 52 i_log2_num_nodes++; 53 } 54 55 i_log2_num_nodes--; 56 floor_num_nodes >>= 1; 57 modfl_num_nodes = (num_nodes - floor_num_nodes); 58 59 if ((my_id > 0) && (my_id <= modfl_num_nodes)) 60 {edge_not_pow_2=((my_id|floor_num_nodes)-1);} 61 else if (my_id >= floor_num_nodes) 62 {edge_not_pow_2=((my_id^floor_num_nodes)+1); 63 } 64 else 65 {edge_not_pow_2 = 0;} 66 PetscFunctionReturn(0); 67 } 68 69 /***********************************comm.c*************************************/ 70 PetscErrorCode giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs) 71 { 72 PetscInt mask, edge; 73 PetscInt type, dest; 74 vfp fp; 75 MPI_Status status; 76 PetscInt ierr; 77 78 PetscFunctionBegin; 79 /* ok ... should have some data, work, and operator(s) */ 80 if (!vals||!work||!oprs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs); 81 82 /* non-uniform should have at least two entries */ 83 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop() :: non_uniform and n=0,1?"); 84 85 /* check to make sure comm package has been initialized */ 86 if (!p_init) 87 {comm_init();} 88 89 /* if there's nothing to do return */ 90 if ((num_nodes<2)||(!n)) 91 { 92 PetscFunctionReturn(0); 93 } 94 95 96 /* a negative number if items to send ==> fatal */ 97 if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop() :: n=%D<0?",n); 98 99 /* advance to list of n operations for custom */ 100 if ((type=oprs[0])==NON_UNIFORM) 101 {oprs++;} 102 103 /* major league hack */ 104 if (!(fp = (vfp) ivec_fct_addr(type))) { 105 ierr = PetscInfo(0,"giop() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 106 fp = (vfp) oprs; 107 } 108 109 /* all msgs will be of the same length */ 110 /* if not a hypercube must colapse partial dim */ 111 if (edge_not_pow_2) 112 { 113 if (my_id >= floor_num_nodes) 114 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 115 else 116 { 117 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 118 (*fp)(vals,work,n,oprs); 119 } 120 } 121 122 /* implement the mesh fan in/out exchange algorithm */ 123 if (my_id<floor_num_nodes) 124 { 125 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 126 { 127 dest = my_id^mask; 128 if (my_id > dest) 129 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 130 else 131 { 132 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 133 (*fp)(vals, work, n, oprs); 134 } 135 } 136 137 mask=floor_num_nodes>>1; 138 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 139 { 140 if (my_id%mask) 141 {continue;} 142 143 dest = my_id^mask; 144 if (my_id < dest) 145 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 146 else 147 { 148 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 149 } 150 } 151 } 152 153 /* if not a hypercube must expand to partial dim */ 154 if (edge_not_pow_2) 155 { 156 if (my_id >= floor_num_nodes) 157 { 158 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 159 } 160 else 161 {ierr = MPI_Send(vals,n,MPIU_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 162 } 163 PetscFunctionReturn(0); 164 } 165 166 /***********************************comm.c*************************************/ 167 PetscErrorCode grop(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs) 168 { 169 PetscInt mask, edge; 170 PetscInt type, dest; 171 vfp fp; 172 MPI_Status status; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 /* ok ... should have some data, work, and operator(s) */ 177 if (!vals||!work||!oprs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs); 178 179 /* non-uniform should have at least two entries */ 180 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop() :: non_uniform and n=0,1?"); 181 182 /* check to make sure comm package has been initialized */ 183 if (!p_init) 184 {comm_init();} 185 186 /* if there's nothing to do return */ 187 if ((num_nodes<2)||(!n)) 188 { PetscFunctionReturn(0);} 189 190 /* a negative number of items to send ==> fatal */ 191 if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gdop() :: n=%D<0?",n); 192 193 /* advance to list of n operations for custom */ 194 if ((type=oprs[0])==NON_UNIFORM) 195 {oprs++;} 196 197 if (!(fp = (vfp) rvec_fct_addr(type))) { 198 ierr = PetscInfo(0,"grop() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 199 fp = (vfp) oprs; 200 } 201 202 /* all msgs will be of the same length */ 203 /* if not a hypercube must colapse partial dim */ 204 if (edge_not_pow_2) 205 { 206 if (my_id >= floor_num_nodes) 207 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 208 else 209 { 210 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 211 (*fp)(vals,work,n,oprs); 212 } 213 } 214 215 /* implement the mesh fan in/out exchange algorithm */ 216 if (my_id<floor_num_nodes) 217 { 218 for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1) 219 { 220 dest = my_id^mask; 221 if (my_id > dest) 222 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 223 else 224 { 225 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 226 (*fp)(vals, work, n, oprs); 227 } 228 } 229 230 mask=floor_num_nodes>>1; 231 for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1) 232 { 233 if (my_id%mask) 234 {continue;} 235 236 dest = my_id^mask; 237 if (my_id < dest) 238 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 239 else 240 { 241 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 242 } 243 } 244 } 245 246 /* if not a hypercube must expand to partial dim */ 247 if (edge_not_pow_2) 248 { 249 if (my_id >= floor_num_nodes) 250 { 251 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 252 } 253 else 254 {ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 255 } 256 PetscFunctionReturn(0); 257 } 258 259 /***********************************comm.c*************************************/ 260 PetscErrorCode grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs, PetscInt dim) 261 { 262 PetscInt mask, edge; 263 PetscInt type, dest; 264 vfp fp; 265 MPI_Status status; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 /* ok ... should have some data, work, and operator(s) */ 270 if (!vals||!work||!oprs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs); 271 272 /* non-uniform should have at least two entries */ 273 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: non_uniform and n=0,1?"); 274 275 /* check to make sure comm package has been initialized */ 276 if (!p_init) 277 {comm_init();} 278 279 /* if there's nothing to do return */ 280 if ((num_nodes<2)||(!n)||(dim<=0)) 281 {PetscFunctionReturn(0);} 282 283 /* the error msg says it all!!! */ 284 if (modfl_num_nodes) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: num_nodes not a power of 2!?!"); 285 286 /* a negative number of items to send ==> fatal */ 287 if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"grop_hc() :: n=%D<0?",n); 288 289 /* can't do more dimensions then exist */ 290 dim = PetscMin(dim,i_log2_num_nodes); 291 292 /* advance to list of n operations for custom */ 293 if ((type=oprs[0])==NON_UNIFORM) 294 {oprs++;} 295 296 if (!(fp = (vfp) rvec_fct_addr(type))) { 297 ierr = PetscInfo(0,"grop_hc() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 298 fp = (vfp) oprs; 299 } 300 301 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 302 { 303 dest = my_id^mask; 304 if (my_id > dest) 305 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 306 else 307 { 308 ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 309 (*fp)(vals, work, n, oprs); 310 } 311 } 312 313 if (edge==dim) 314 {mask>>=1;} 315 else 316 {while (++edge<dim) {mask<<=1;}} 317 318 for (edge=0; edge<dim; edge++,mask>>=1) 319 { 320 if (my_id%mask) 321 {continue;} 322 323 dest = my_id^mask; 324 if (my_id < dest) 325 {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 326 else 327 { 328 ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 329 } 330 } 331 PetscFunctionReturn(0); 332 } 333 334 /******************************************************************************/ 335 PetscErrorCode ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs) 336 { 337 PetscInt edge, type, dest, mask; 338 PetscInt stage_n; 339 MPI_Status status; 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 /* check to make sure comm package has been initialized */ 344 if (!p_init) 345 {comm_init();} 346 347 348 /* all msgs are *NOT* the same length */ 349 /* implement the mesh fan in/out exchange algorithm */ 350 for (mask=0, edge=0; edge<level; edge++, mask++) 351 { 352 stage_n = (segs[level] - segs[edge]); 353 if (stage_n && !(my_id & mask)) 354 { 355 dest = edge_node[edge]; 356 type = MSGTAG3 + my_id + (num_nodes*edge); 357 if (my_id>dest) 358 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 359 else 360 { 361 type = type - my_id + dest; 362 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 363 rvec_add(vals+segs[edge], work, stage_n); 364 } 365 } 366 mask <<= 1; 367 } 368 mask>>=1; 369 for (edge=0; edge<level; edge++) 370 { 371 stage_n = (segs[level] - segs[level-1-edge]); 372 if (stage_n && !(my_id & mask)) 373 { 374 dest = edge_node[level-edge-1]; 375 type = MSGTAG6 + my_id + (num_nodes*edge); 376 if (my_id<dest) 377 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 378 else 379 { 380 type = type - my_id + dest; 381 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 382 } 383 } 384 mask >>= 1; 385 } 386 PetscFunctionReturn(0); 387 } 388 389 /******************************************************************************/ 390 PetscErrorCode new_ssgl_radd( PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs) 391 { 392 PetscInt edge, type, dest, mask; 393 PetscInt stage_n; 394 MPI_Status status; 395 PetscErrorCode ierr; 396 397 PetscFunctionBegin; 398 /* check to make sure comm package has been initialized */ 399 if (!p_init) 400 {comm_init();} 401 402 /* all msgs are *NOT* the same length */ 403 /* implement the mesh fan in/out exchange algorithm */ 404 for (mask=0, edge=0; edge<level; edge++, mask++) 405 { 406 stage_n = (segs[level] - segs[edge]); 407 if (stage_n && !(my_id & mask)) 408 { 409 dest = edge_node[edge]; 410 type = MSGTAG3 + my_id + (num_nodes*edge); 411 if (my_id>dest) 412 {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type, MPI_COMM_WORLD);CHKERRQ(ierr);} 413 else 414 { 415 type = type - my_id + dest; 416 ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type, MPI_COMM_WORLD,&status);CHKERRQ(ierr); 417 rvec_add(vals+segs[edge], work, stage_n); 418 } 419 } 420 mask <<= 1; 421 } 422 mask>>=1; 423 for (edge=0; edge<level; edge++) 424 { 425 stage_n = (segs[level] - segs[level-1-edge]); 426 if (stage_n && !(my_id & mask)) 427 { 428 dest = edge_node[level-edge-1]; 429 type = MSGTAG6 + my_id + (num_nodes*edge); 430 if (my_id<dest) 431 {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,MPI_COMM_WORLD);CHKERRQ(ierr);} 432 else 433 { 434 type = type - my_id + dest; 435 ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR, MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 436 } 437 } 438 mask >>= 1; 439 } 440 PetscFunctionReturn(0); 441 } 442 443 /***********************************comm.c*************************************/ 444 PetscErrorCode giop_hc(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs, PetscInt dim) 445 { 446 PetscInt mask, edge; 447 PetscInt type, dest; 448 vfp fp; 449 MPI_Status status; 450 PetscErrorCode ierr; 451 452 PetscFunctionBegin; 453 /* ok ... should have some data, work, and operator(s) */ 454 if (!vals||!work||!oprs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs); 455 456 /* non-uniform should have at least two entries */ 457 if ((oprs[0] == NON_UNIFORM)&&(n<2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: non_uniform and n=0,1?"); 458 459 /* check to make sure comm package has been initialized */ 460 if (!p_init) 461 {comm_init();} 462 463 /* if there's nothing to do return */ 464 if ((num_nodes<2)||(!n)||(dim<=0)) 465 { PetscFunctionReturn(0);} 466 467 /* the error msg says it all!!! */ 468 if (modfl_num_nodes) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: num_nodes not a power of 2!?!"); 469 470 /* a negative number of items to send ==> fatal */ 471 if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"giop_hc() :: n=%D<0?",n); 472 473 /* can't do more dimensions then exist */ 474 dim = PetscMin(dim,i_log2_num_nodes); 475 476 /* advance to list of n operations for custom */ 477 if ((type=oprs[0])==NON_UNIFORM) 478 {oprs++;} 479 480 if (!(fp = (vfp) ivec_fct_addr(type))){ 481 ierr = PetscInfo(0,"giop_hc() :: hope you passed in a rbfp!\n");CHKERRQ(ierr); 482 fp = (vfp) oprs; 483 } 484 485 for (mask=1,edge=0; edge<dim; edge++,mask<<=1) 486 { 487 dest = my_id^mask; 488 if (my_id > dest) 489 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 490 else 491 { 492 ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD, &status);CHKERRQ(ierr); 493 (*fp)(vals, work, n, oprs); 494 } 495 } 496 497 if (edge==dim) 498 {mask>>=1;} 499 else 500 {while (++edge<dim) {mask<<=1;}} 501 502 for (edge=0; edge<dim; edge++,mask>>=1) 503 { 504 if (my_id%mask) 505 {continue;} 506 507 dest = my_id^mask; 508 if (my_id < dest) 509 {ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);CHKERRQ(ierr);} 510 else 511 { 512 ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,&status);CHKERRQ(ierr); 513 } 514 } 515 PetscFunctionReturn(0); 516 } 517