xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision a58c3bc3391eee32bc3fd94ac7edeea38fe57aae)
1 #define PETSCKSP_DLL
2 
3 /***********************************comm.c*************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 11.21.97
16 ***********************************comm.c*************************************/
17 
18 /***********************************comm.c*************************************
19 File Description:
20 -----------------
21 
22 ***********************************comm.c*************************************/
23 #include "src/ksp/pc/impls/tfs/tfs.h"
24 
25 
26 /* global program control variables - explicitly exported */
27 int my_id            = 0;
28 int num_nodes        = 1;
29 int floor_num_nodes  = 0;
30 int i_log2_num_nodes = 0;
31 
32 /* global program control variables */
33 static int p_init = 0;
34 static int modfl_num_nodes;
35 static int edge_not_pow_2;
36 
37 static unsigned int edge_node[sizeof(PetscInt)*32];
38 
39 /***********************************comm.c*************************************
40 Function: giop()
41 
42 Input :
43 Output:
44 Return:
45 Description:
46 ***********************************comm.c*************************************/
47 void
48 comm_init (void)
49 {
50 
51   if (p_init++) return;
52 
53 #if PETSC_SIZEOF_INT != 4
54   error_msg_warning("Int != 4 Bytes!");
55 #endif
56 
57 
58   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
59   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
60 
61   if (num_nodes> (INT_MAX >> 1))
62   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
63 
64   ivec_zero((int*)edge_node,sizeof(PetscInt)*32);
65 
66   floor_num_nodes = 1;
67   i_log2_num_nodes = modfl_num_nodes = 0;
68   while (floor_num_nodes <= num_nodes)
69     {
70       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
71       floor_num_nodes <<= 1;
72       i_log2_num_nodes++;
73     }
74 
75   i_log2_num_nodes--;
76   floor_num_nodes >>= 1;
77   modfl_num_nodes = (num_nodes - floor_num_nodes);
78 
79   if ((my_id > 0) && (my_id <= modfl_num_nodes))
80     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
81   else if (my_id >= floor_num_nodes)
82     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
83     }
84   else
85     {edge_not_pow_2 = 0;}
86 
87 }
88 
89 
90 
91 /***********************************comm.c*************************************
92 Function: giop()
93 
94 Input :
95 Output:
96 Return:
97 Description: fan-in/out version
98 ***********************************comm.c*************************************/
99 void
100 giop(int *vals, int *work, int n, int *oprs)
101 {
102    int mask, edge;
103   int type, dest;
104   vfp fp;
105   MPI_Status  status;
106   int ierr;
107 
108 
109 #ifdef SAFE
110   /* ok ... should have some data, work, and operator(s) */
111   if (!vals||!work||!oprs)
112     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
113 
114   /* non-uniform should have at least two entries */
115   if ((oprs[0] == NON_UNIFORM)&&(n<2))
116     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
117 
118   /* check to make sure comm package has been initialized */
119   if (!p_init)
120     {comm_init();}
121 #endif
122 
123   /* if there's nothing to do return */
124   if ((num_nodes<2)||(!n))
125     {
126       return;
127     }
128 
129   /* a negative number if items to send ==> fatal */
130   if (n<0)
131     {error_msg_fatal("giop() :: n=%D<0?",n);}
132 
133   /* advance to list of n operations for custom */
134   if ((type=oprs[0])==NON_UNIFORM)
135     {oprs++;}
136 
137   /* major league hack */
138   if (!(fp = (vfp) ivec_fct_addr(type))) {
139     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
140     fp = (vfp) oprs;
141   }
142 
143   /* all msgs will be of the same length */
144   /* if not a hypercube must colapse partial dim */
145   if (edge_not_pow_2)
146     {
147       if (my_id >= floor_num_nodes)
148 	{ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
149       else
150 	{
151 	  ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
152 		   MPI_COMM_WORLD,&status);
153 	  (*fp)(vals,work,n,oprs);
154 	}
155     }
156 
157   /* implement the mesh fan in/out exchange algorithm */
158   if (my_id<floor_num_nodes)
159     {
160       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
161 	{
162 	  dest = my_id^mask;
163 	  if (my_id > dest)
164 	    {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
165 	  else
166 	    {
167 	      ierr = MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
168 		       MPI_COMM_WORLD, &status);
169 	      (*fp)(vals, work, n, oprs);
170 	    }
171 	}
172 
173       mask=floor_num_nodes>>1;
174       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
175 	{
176 	  if (my_id%mask)
177 	    {continue;}
178 
179 	  dest = my_id^mask;
180 	  if (my_id < dest)
181 	    {ierr = MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
182 	  else
183 	    {
184 	      ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
185 		       MPI_COMM_WORLD, &status);
186 	    }
187 	}
188     }
189 
190   /* if not a hypercube must expand to partial dim */
191   if (edge_not_pow_2)
192     {
193       if (my_id >= floor_num_nodes)
194 	{
195 	  ierr = MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
196 		   MPI_COMM_WORLD,&status);
197 	}
198       else
199 	{ierr = MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
200     }
201 }
202 
203 /***********************************comm.c*************************************
204 Function: grop()
205 
206 Input :
207 Output:
208 Return:
209 Description: fan-in/out version
210 ***********************************comm.c*************************************/
211 void
212 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs)
213 {
214    int mask, edge;
215   int type, dest;
216   vfp fp;
217   MPI_Status  status;
218   int ierr;
219 
220 
221 #ifdef SAFE
222   /* ok ... should have some data, work, and operator(s) */
223   if (!vals||!work||!oprs)
224     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
225 
226   /* non-uniform should have at least two entries */
227   if ((oprs[0] == NON_UNIFORM)&&(n<2))
228     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
229 
230   /* check to make sure comm package has been initialized */
231   if (!p_init)
232     {comm_init();}
233 #endif
234 
235   /* if there's nothing to do return */
236   if ((num_nodes<2)||(!n))
237     {return;}
238 
239   /* a negative number of items to send ==> fatal */
240   if (n<0)
241     {error_msg_fatal("gdop() :: n=%D<0?",n);}
242 
243   /* advance to list of n operations for custom */
244   if ((type=oprs[0])==NON_UNIFORM)
245     {oprs++;}
246 
247   if (!(fp = (vfp) rvec_fct_addr(type))) {
248     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
249     fp = (vfp) oprs;
250   }
251 
252   /* all msgs will be of the same length */
253   /* if not a hypercube must colapse partial dim */
254   if (edge_not_pow_2)
255     {
256       if (my_id >= floor_num_nodes)
257 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,
258 		  MPI_COMM_WORLD);}
259       else
260 	{
261 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
262 		   MPI_COMM_WORLD,&status);
263 	  (*fp)(vals,work,n,oprs);
264 	}
265     }
266 
267   /* implement the mesh fan in/out exchange algorithm */
268   if (my_id<floor_num_nodes)
269     {
270       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
271 	{
272 	  dest = my_id^mask;
273 	  if (my_id > dest)
274 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
275 	  else
276 	    {
277 	      ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
278 		       MPI_COMM_WORLD, &status);
279 	      (*fp)(vals, work, n, oprs);
280 	    }
281 	}
282 
283       mask=floor_num_nodes>>1;
284       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
285 	{
286 	  if (my_id%mask)
287 	    {continue;}
288 
289 	  dest = my_id^mask;
290 	  if (my_id < dest)
291 	    {ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
292 	  else
293 	    {
294 	      ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,
295 		       MPI_COMM_WORLD, &status);
296 	    }
297 	}
298     }
299 
300   /* if not a hypercube must expand to partial dim */
301   if (edge_not_pow_2)
302     {
303       if (my_id >= floor_num_nodes)
304 	{
305 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
306 		   MPI_COMM_WORLD,&status);
307 	}
308       else
309 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,
310 		  MPI_COMM_WORLD);}
311     }
312 }
313 
314 
315 /***********************************comm.c*************************************
316 Function: grop()
317 
318 Input :
319 Output:
320 Return:
321 Description: fan-in/out version
322 
323 note good only for num_nodes=2^k!!!
324 
325 ***********************************comm.c*************************************/
326 void
327 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim)
328 {
329    int mask, edge;
330   int type, dest;
331   vfp fp;
332   MPI_Status  status;
333   int ierr;
334 
335 #ifdef SAFE
336   /* ok ... should have some data, work, and operator(s) */
337   if (!vals||!work||!oprs)
338     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
339 
340   /* non-uniform should have at least two entries */
341   if ((oprs[0] == NON_UNIFORM)&&(n<2))
342     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
343 
344   /* check to make sure comm package has been initialized */
345   if (!p_init)
346     {comm_init();}
347 #endif
348 
349   /* if there's nothing to do return */
350   if ((num_nodes<2)||(!n)||(dim<=0))
351     {return;}
352 
353   /* the error msg says it all!!! */
354   if (modfl_num_nodes)
355     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
356 
357   /* a negative number of items to send ==> fatal */
358   if (n<0)
359     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
360 
361   /* can't do more dimensions then exist */
362   dim = PetscMin(dim,i_log2_num_nodes);
363 
364   /* advance to list of n operations for custom */
365   if ((type=oprs[0])==NON_UNIFORM)
366     {oprs++;}
367 
368   if (!(fp = (vfp) rvec_fct_addr(type))) {
369     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
370     fp = (vfp) oprs;
371   }
372 
373   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
374     {
375       dest = my_id^mask;
376       if (my_id > dest)
377 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
378       else
379 	{
380 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
381 		   &status);
382 	  (*fp)(vals, work, n, oprs);
383 	}
384     }
385 
386   if (edge==dim)
387     {mask>>=1;}
388   else
389     {while (++edge<dim) {mask<<=1;}}
390 
391   for (edge=0; edge<dim; edge++,mask>>=1)
392     {
393       if (my_id%mask)
394 	{continue;}
395 
396       dest = my_id^mask;
397       if (my_id < dest)
398 	{ierr = MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
399       else
400 	{
401 	  ierr = MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
402 		   &status);
403 	}
404     }
405 }
406 
407 
408 /***********************************comm.c*************************************
409 Function: gop()
410 
411 Input :
412 Output:
413 Return:
414 Description: fan-in/out version
415 ***********************************comm.c*************************************/
416 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type)
417 {
418    int mask, edge;
419   int dest;
420   MPI_Status  status;
421   MPI_Op op;
422   int ierr;
423 
424 #ifdef SAFE
425   /* check to make sure comm package has been initialized */
426   if (!p_init)
427     {comm_init();}
428 
429   /* ok ... should have some data, work, and operator(s) */
430   if (!vals||!work||!fp)
431     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
432 #endif
433 
434   /* if there's nothing to do return */
435   if ((num_nodes<2)||(!n))
436     {return;}
437 
438   /* a negative number of items to send ==> fatal */
439   if (n<0)
440     {error_msg_fatal("gop() :: n=%D<0?",n);}
441 
442   if (comm_type==MPI)
443     {
444       ierr = MPI_Op_create(fp,TRUE,&op);
445       ierr = MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
446       ierr = MPI_Op_free(&op);
447       return;
448     }
449 
450   /* if not a hypercube must colapse partial dim */
451   if (edge_not_pow_2)
452     {
453       if (my_id >= floor_num_nodes)
454 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
455 		  MPI_COMM_WORLD);}
456       else
457 	{
458 	  ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
459 		   MPI_COMM_WORLD,&status);
460 	  (*fp)(vals,work,&n,&dt);
461 	}
462     }
463 
464   /* implement the mesh fan in/out exchange algorithm */
465   if (my_id<floor_num_nodes)
466     {
467       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
468 	{
469 	  dest = my_id^mask;
470 	  if (my_id > dest)
471 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
472 	  else
473 	    {
474 	      ierr = MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
475 		       MPI_COMM_WORLD, &status);
476 	      (*fp)(vals, work, &n, &dt);
477 	    }
478 	}
479 
480       mask=floor_num_nodes>>1;
481       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
482 	{
483 	  if (my_id%mask)
484 	    {continue;}
485 
486 	  dest = my_id^mask;
487 	  if (my_id < dest)
488 	    {ierr = MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
489 	  else
490 	    {
491 	      ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
492 		       MPI_COMM_WORLD, &status);
493 	    }
494 	}
495     }
496 
497   /* if not a hypercube must expand to partial dim */
498   if (edge_not_pow_2)
499     {
500       if (my_id >= floor_num_nodes)
501 	{
502 	  ierr = MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
503 		   MPI_COMM_WORLD,&status);
504 	}
505       else
506 	{ierr = MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
507 		  MPI_COMM_WORLD);}
508     }
509 }
510 
511 
512 
513 
514 
515 
516 /******************************************************************************
517 Function: giop()
518 
519 Input :
520 Output:
521 Return:
522 Description:
523 
524 ii+1 entries in seg :: 0 .. ii
525 
526 ******************************************************************************/
527 void
528 ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level,
529 	   int *segs)
530 {
531    int edge, type, dest, mask;
532    int stage_n;
533   MPI_Status  status;
534   int ierr;
535 
536 #ifdef SAFE
537   /* check to make sure comm package has been initialized */
538   if (!p_init)
539     {comm_init();}
540 #endif
541 
542 
543   /* all msgs are *NOT* the same length */
544   /* implement the mesh fan in/out exchange algorithm */
545   for (mask=0, edge=0; edge<level; edge++, mask++)
546     {
547       stage_n = (segs[level] - segs[edge]);
548       if (stage_n && !(my_id & mask))
549 	{
550 	  dest = edge_node[edge];
551 	  type = MSGTAG3 + my_id + (num_nodes*edge);
552 	  if (my_id>dest)
553           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
554                       MPI_COMM_WORLD);}
555 	  else
556 	    {
557 	      type =  type - my_id + dest;
558               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
559                        MPI_COMM_WORLD,&status);
560 	      rvec_add(vals+segs[edge], work, stage_n);
561 	    }
562 	}
563       mask <<= 1;
564     }
565   mask>>=1;
566   for (edge=0; edge<level; edge++)
567     {
568       stage_n = (segs[level] - segs[level-1-edge]);
569       if (stage_n && !(my_id & mask))
570 	{
571 	  dest = edge_node[level-edge-1];
572 	  type = MSGTAG6 + my_id + (num_nodes*edge);
573 	  if (my_id<dest)
574             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
575                       MPI_COMM_WORLD);}
576 	  else
577 	    {
578 	      type =  type - my_id + dest;
579               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
580                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
581 	    }
582 	}
583       mask >>= 1;
584     }
585 }
586 
587 
588 
589 /***********************************comm.c*************************************
590 Function: grop_hc_vvl()
591 
592 Input :
593 Output:
594 Return:
595 Description: fan-in/out version
596 
597 note good only for num_nodes=2^k!!!
598 
599 ***********************************comm.c*************************************/
600 void
601 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim)
602 {
603    int mask, edge, n;
604   int type, dest;
605   vfp fp;
606   MPI_Status  status;
607   int ierr;
608 
609   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
610 
611 #ifdef SAFE
612   /* ok ... should have some data, work, and operator(s) */
613   if (!vals||!work||!oprs||!segs)
614     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
615 
616   /* non-uniform should have at least two entries */
617 
618   /* check to make sure comm package has been initialized */
619   if (!p_init)
620     {comm_init();}
621 #endif
622 
623   /* if there's nothing to do return */
624   if ((num_nodes<2)||(dim<=0))
625     {return;}
626 
627   /* the error msg says it all!!! */
628   if (modfl_num_nodes)
629     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
630 
631   /* can't do more dimensions then exist */
632   dim = PetscMin(dim,i_log2_num_nodes);
633 
634   /* advance to list of n operations for custom */
635   if ((type=oprs[0])==NON_UNIFORM)
636     {oprs++;}
637 
638   if (!(fp = (vfp) rvec_fct_addr(type))){
639     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
640     fp = (vfp) oprs;
641   }
642 
643   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
644     {
645       n = segs[dim]-segs[edge];
646       dest = my_id^mask;
647       if (my_id > dest)
648 	{ierr = MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
649       else
650 	{
651 	  ierr = MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
652 		   MPI_COMM_WORLD, &status);
653 	  (*fp)(vals+segs[edge], work, n, oprs);
654 	}
655     }
656 
657   if (edge==dim)
658     {mask>>=1;}
659   else
660     {while (++edge<dim) {mask<<=1;}}
661 
662   for (edge=0; edge<dim; edge++,mask>>=1)
663     {
664       if (my_id%mask)
665 	{continue;}
666 
667       n = (segs[dim]-segs[dim-1-edge]);
668 
669       dest = my_id^mask;
670       if (my_id < dest)
671 	{ierr = MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id,
672 		  MPI_COMM_WORLD);}
673       else
674 	{
675 	  ierr = MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,
676 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
677 	}
678     }
679 }
680 
681 /******************************************************************************
682 Function: giop()
683 
684 Input :
685 Output:
686 Return:
687 Description:
688 
689 ii+1 entries in seg :: 0 .. ii
690 
691 ******************************************************************************/
692 void new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
693 {
694    int edge, type, dest, mask;
695    int stage_n;
696   MPI_Status  status;
697   int ierr;
698 
699 #ifdef SAFE
700   /* check to make sure comm package has been initialized */
701   if (!p_init)
702     {comm_init();}
703 #endif
704 
705   /* all msgs are *NOT* the same length */
706   /* implement the mesh fan in/out exchange algorithm */
707   for (mask=0, edge=0; edge<level; edge++, mask++)
708     {
709       stage_n = (segs[level] - segs[edge]);
710       if (stage_n && !(my_id & mask))
711 	{
712 	  dest = edge_node[edge];
713 	  type = MSGTAG3 + my_id + (num_nodes*edge);
714 	  if (my_id>dest)
715           {ierr = MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
716                       MPI_COMM_WORLD);}
717 	  else
718 	    {
719 	      type =  type - my_id + dest;
720               ierr = MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
721                        MPI_COMM_WORLD,&status);
722 	      rvec_add(vals+segs[edge], work, stage_n);
723 	    }
724 	}
725       mask <<= 1;
726     }
727   mask>>=1;
728   for (edge=0; edge<level; edge++)
729     {
730       stage_n = (segs[level] - segs[level-1-edge]);
731       if (stage_n && !(my_id & mask))
732 	{
733 	  dest = edge_node[level-edge-1];
734 	  type = MSGTAG6 + my_id + (num_nodes*edge);
735 	  if (my_id<dest)
736             {ierr = MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
737                       MPI_COMM_WORLD);}
738 	  else
739 	    {
740 	      type =  type - my_id + dest;
741               ierr = MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
742                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
743 	    }
744 	}
745       mask >>= 1;
746     }
747 }
748 
749 
750 
751 /***********************************comm.c*************************************
752 Function: giop()
753 
754 Input :
755 Output:
756 Return:
757 Description: fan-in/out version
758 
759 note good only for num_nodes=2^k!!!
760 
761 ***********************************comm.c*************************************/
762 void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
763 {
764    int mask, edge;
765   int type, dest;
766   vfp fp;
767   MPI_Status  status;
768   int ierr;
769 
770 #ifdef SAFE
771   /* ok ... should have some data, work, and operator(s) */
772   if (!vals||!work||!oprs)
773     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
774 
775   /* non-uniform should have at least two entries */
776   if ((oprs[0] == NON_UNIFORM)&&(n<2))
777     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
778 
779   /* check to make sure comm package has been initialized */
780   if (!p_init)
781     {comm_init();}
782 #endif
783 
784   /* if there's nothing to do return */
785   if ((num_nodes<2)||(!n)||(dim<=0))
786     {return;}
787 
788   /* the error msg says it all!!! */
789   if (modfl_num_nodes)
790     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
791 
792   /* a negative number of items to send ==> fatal */
793   if (n<0)
794     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
795 
796   /* can't do more dimensions then exist */
797   dim = PetscMin(dim,i_log2_num_nodes);
798 
799   /* advance to list of n operations for custom */
800   if ((type=oprs[0])==NON_UNIFORM)
801     {oprs++;}
802 
803   if (!(fp = (vfp) ivec_fct_addr(type))){
804     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
805     fp = (vfp) oprs;
806   }
807 
808   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
809     {
810       dest = my_id^mask;
811       if (my_id > dest)
812 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
813       else
814 	{
815 	  ierr = MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
816 		   &status);
817 	  (*fp)(vals, work, n, oprs);
818 	}
819     }
820 
821   if (edge==dim)
822     {mask>>=1;}
823   else
824     {while (++edge<dim) {mask<<=1;}}
825 
826   for (edge=0; edge<dim; edge++,mask>>=1)
827     {
828       if (my_id%mask)
829 	{continue;}
830 
831       dest = my_id^mask;
832       if (my_id < dest)
833 	{ierr = MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
834       else
835 	{
836 	  ierr = MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
837 		   &status);
838 	}
839     }
840 }
841