xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision 8e27ec22c2bf3ca2ab9510c404d8602b09c0b2df)
1 #define PETSCKSP_DLL
2 
3 /***********************************comm.c*************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 11.21.97
16 ***********************************comm.c*************************************/
17 
18 /***********************************comm.c*************************************
19 File Description:
20 -----------------
21 
22 ***********************************comm.c*************************************/
23 #include "src/ksp/pc/impls/tfs/tfs.h"
24 
25 
26 /* global program control variables - explicitly exported */
27 int my_id            = 0;
28 int num_nodes        = 1;
29 int floor_num_nodes  = 0;
30 int i_log2_num_nodes = 0;
31 
32 /* global program control variables */
33 static int p_init = 0;
34 static int modfl_num_nodes;
35 static int edge_not_pow_2;
36 
37 static unsigned int edge_node[sizeof(PetscInt)*32];
38 
39 /***********************************comm.c*************************************
40 Function: giop()
41 
42 Input :
43 Output:
44 Return:
45 Description:
46 ***********************************comm.c*************************************/
47 void
48 comm_init (void)
49 {
50 
51   if (p_init++) return;
52 
53 #if PETSC_SIZEOF_INT != 4
54   error_msg_warning("Int != 4 Bytes!");
55 #endif
56 
57 
58   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
59   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
60 
61   if (num_nodes> (INT_MAX >> 1))
62   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
63 
64   ivec_zero((int*)edge_node,sizeof(PetscInt)*32);
65 
66   floor_num_nodes = 1;
67   i_log2_num_nodes = modfl_num_nodes = 0;
68   while (floor_num_nodes <= num_nodes)
69     {
70       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
71       floor_num_nodes <<= 1;
72       i_log2_num_nodes++;
73     }
74 
75   i_log2_num_nodes--;
76   floor_num_nodes >>= 1;
77   modfl_num_nodes = (num_nodes - floor_num_nodes);
78 
79   if ((my_id > 0) && (my_id <= modfl_num_nodes))
80     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
81   else if (my_id >= floor_num_nodes)
82     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
83     }
84   else
85     {edge_not_pow_2 = 0;}
86 
87 }
88 
89 
90 
91 /***********************************comm.c*************************************
92 Function: giop()
93 
94 Input :
95 Output:
96 Return:
97 Description: fan-in/out version
98 ***********************************comm.c*************************************/
99 void
100 giop(int *vals, int *work, int n, int *oprs)
101 {
102    int mask, edge;
103   int type, dest;
104   vfp fp;
105   MPI_Status  status;
106 
107 
108 #ifdef SAFE
109   /* ok ... should have some data, work, and operator(s) */
110   if (!vals||!work||!oprs)
111     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
112 
113   /* non-uniform should have at least two entries */
114   if ((oprs[0] == NON_UNIFORM)&&(n<2))
115     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
116 
117   /* check to make sure comm package has been initialized */
118   if (!p_init)
119     {comm_init();}
120 #endif
121 
122   /* if there's nothing to do return */
123   if ((num_nodes<2)||(!n))
124     {
125       return;
126     }
127 
128   /* a negative number if items to send ==> fatal */
129   if (n<0)
130     {error_msg_fatal("giop() :: n=%D<0?",n);}
131 
132   /* advance to list of n operations for custom */
133   if ((type=oprs[0])==NON_UNIFORM)
134     {oprs++;}
135 
136   /* major league hack */
137   if (!(fp = (vfp) ivec_fct_addr(type))) {
138     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
139     fp = (vfp) oprs;
140   }
141 
142   /* all msgs will be of the same length */
143   /* if not a hypercube must colapse partial dim */
144   if (edge_not_pow_2)
145     {
146       if (my_id >= floor_num_nodes)
147 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
148       else
149 	{
150 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
151 		   MPI_COMM_WORLD,&status);
152 	  (*fp)(vals,work,n,oprs);
153 	}
154     }
155 
156   /* implement the mesh fan in/out exchange algorithm */
157   if (my_id<floor_num_nodes)
158     {
159       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
160 	{
161 	  dest = my_id^mask;
162 	  if (my_id > dest)
163 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
164 	  else
165 	    {
166 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
167 		       MPI_COMM_WORLD, &status);
168 	      (*fp)(vals, work, n, oprs);
169 	    }
170 	}
171 
172       mask=floor_num_nodes>>1;
173       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
174 	{
175 	  if (my_id%mask)
176 	    {continue;}
177 
178 	  dest = my_id^mask;
179 	  if (my_id < dest)
180 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
181 	  else
182 	    {
183 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
184 		       MPI_COMM_WORLD, &status);
185 	    }
186 	}
187     }
188 
189   /* if not a hypercube must expand to partial dim */
190   if (edge_not_pow_2)
191     {
192       if (my_id >= floor_num_nodes)
193 	{
194 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
195 		   MPI_COMM_WORLD,&status);
196 	}
197       else
198 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
199     }
200 }
201 
202 /***********************************comm.c*************************************
203 Function: grop()
204 
205 Input :
206 Output:
207 Return:
208 Description: fan-in/out version
209 ***********************************comm.c*************************************/
210 void
211 grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs)
212 {
213    int mask, edge;
214   int type, dest;
215   vfp fp;
216   MPI_Status  status;
217 
218 
219 #ifdef SAFE
220   /* ok ... should have some data, work, and operator(s) */
221   if (!vals||!work||!oprs)
222     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
223 
224   /* non-uniform should have at least two entries */
225   if ((oprs[0] == NON_UNIFORM)&&(n<2))
226     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
227 
228   /* check to make sure comm package has been initialized */
229   if (!p_init)
230     {comm_init();}
231 #endif
232 
233   /* if there's nothing to do return */
234   if ((num_nodes<2)||(!n))
235     {return;}
236 
237   /* a negative number of items to send ==> fatal */
238   if (n<0)
239     {error_msg_fatal("gdop() :: n=%D<0?",n);}
240 
241   /* advance to list of n operations for custom */
242   if ((type=oprs[0])==NON_UNIFORM)
243     {oprs++;}
244 
245   if (!(fp = (vfp) rvec_fct_addr(type))) {
246     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
247     fp = (vfp) oprs;
248   }
249 
250   /* all msgs will be of the same length */
251   /* if not a hypercube must colapse partial dim */
252   if (edge_not_pow_2)
253     {
254       if (my_id >= floor_num_nodes)
255 	{MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,
256 		  MPI_COMM_WORLD);}
257       else
258 	{
259 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
260 		   MPI_COMM_WORLD,&status);
261 	  (*fp)(vals,work,n,oprs);
262 	}
263     }
264 
265   /* implement the mesh fan in/out exchange algorithm */
266   if (my_id<floor_num_nodes)
267     {
268       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
269 	{
270 	  dest = my_id^mask;
271 	  if (my_id > dest)
272 	    {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
273 	  else
274 	    {
275 	      MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
276 		       MPI_COMM_WORLD, &status);
277 	      (*fp)(vals, work, n, oprs);
278 	    }
279 	}
280 
281       mask=floor_num_nodes>>1;
282       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
283 	{
284 	  if (my_id%mask)
285 	    {continue;}
286 
287 	  dest = my_id^mask;
288 	  if (my_id < dest)
289 	    {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
290 	  else
291 	    {
292 	      MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,
293 		       MPI_COMM_WORLD, &status);
294 	    }
295 	}
296     }
297 
298   /* if not a hypercube must expand to partial dim */
299   if (edge_not_pow_2)
300     {
301       if (my_id >= floor_num_nodes)
302 	{
303 	  MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
304 		   MPI_COMM_WORLD,&status);
305 	}
306       else
307 	{MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,
308 		  MPI_COMM_WORLD);}
309     }
310 }
311 
312 
313 /***********************************comm.c*************************************
314 Function: grop()
315 
316 Input :
317 Output:
318 Return:
319 Description: fan-in/out version
320 
321 note good only for num_nodes=2^k!!!
322 
323 ***********************************comm.c*************************************/
324 void
325 grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim)
326 {
327    int mask, edge;
328   int type, dest;
329   vfp fp;
330   MPI_Status  status;
331 
332 #ifdef SAFE
333   /* ok ... should have some data, work, and operator(s) */
334   if (!vals||!work||!oprs)
335     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
336 
337   /* non-uniform should have at least two entries */
338   if ((oprs[0] == NON_UNIFORM)&&(n<2))
339     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
340 
341   /* check to make sure comm package has been initialized */
342   if (!p_init)
343     {comm_init();}
344 #endif
345 
346   /* if there's nothing to do return */
347   if ((num_nodes<2)||(!n)||(dim<=0))
348     {return;}
349 
350   /* the error msg says it all!!! */
351   if (modfl_num_nodes)
352     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
353 
354   /* a negative number of items to send ==> fatal */
355   if (n<0)
356     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
357 
358   /* can't do more dimensions then exist */
359   dim = PetscMin(dim,i_log2_num_nodes);
360 
361   /* advance to list of n operations for custom */
362   if ((type=oprs[0])==NON_UNIFORM)
363     {oprs++;}
364 
365   if (!(fp = (vfp) rvec_fct_addr(type))) {
366     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
367     fp = (vfp) oprs;
368   }
369 
370   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
371     {
372       dest = my_id^mask;
373       if (my_id > dest)
374 	{MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
375       else
376 	{
377 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
378 		   &status);
379 	  (*fp)(vals, work, n, oprs);
380 	}
381     }
382 
383   if (edge==dim)
384     {mask>>=1;}
385   else
386     {while (++edge<dim) {mask<<=1;}}
387 
388   for (edge=0; edge<dim; edge++,mask>>=1)
389     {
390       if (my_id%mask)
391 	{continue;}
392 
393       dest = my_id^mask;
394       if (my_id < dest)
395 	{MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
396       else
397 	{
398 	  MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
399 		   &status);
400 	}
401     }
402 }
403 
404 
405 /***********************************comm.c*************************************
406 Function: gop()
407 
408 Input :
409 Output:
410 Return:
411 Description: fan-in/out version
412 ***********************************comm.c*************************************/
413 void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type)
414 {
415    int mask, edge;
416   int dest;
417   MPI_Status  status;
418   MPI_Op op;
419 
420 #ifdef SAFE
421   /* check to make sure comm package has been initialized */
422   if (!p_init)
423     {comm_init();}
424 
425   /* ok ... should have some data, work, and operator(s) */
426   if (!vals||!work||!fp)
427     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
428 #endif
429 
430   /* if there's nothing to do return */
431   if ((num_nodes<2)||(!n))
432     {return;}
433 
434   /* a negative number of items to send ==> fatal */
435   if (n<0)
436     {error_msg_fatal("gop() :: n=%D<0?",n);}
437 
438   if (comm_type==MPI)
439     {
440       MPI_Op_create(fp,TRUE,&op);
441       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
442       MPI_Op_free(&op);
443       return;
444     }
445 
446   /* if not a hypercube must colapse partial dim */
447   if (edge_not_pow_2)
448     {
449       if (my_id >= floor_num_nodes)
450 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
451 		  MPI_COMM_WORLD);}
452       else
453 	{
454 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
455 		   MPI_COMM_WORLD,&status);
456 	  (*fp)(vals,work,&n,&dt);
457 	}
458     }
459 
460   /* implement the mesh fan in/out exchange algorithm */
461   if (my_id<floor_num_nodes)
462     {
463       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
464 	{
465 	  dest = my_id^mask;
466 	  if (my_id > dest)
467 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
468 	  else
469 	    {
470 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
471 		       MPI_COMM_WORLD, &status);
472 	      (*fp)(vals, work, &n, &dt);
473 	    }
474 	}
475 
476       mask=floor_num_nodes>>1;
477       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
478 	{
479 	  if (my_id%mask)
480 	    {continue;}
481 
482 	  dest = my_id^mask;
483 	  if (my_id < dest)
484 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
485 	  else
486 	    {
487 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
488 		       MPI_COMM_WORLD, &status);
489 	    }
490 	}
491     }
492 
493   /* if not a hypercube must expand to partial dim */
494   if (edge_not_pow_2)
495     {
496       if (my_id >= floor_num_nodes)
497 	{
498 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
499 		   MPI_COMM_WORLD,&status);
500 	}
501       else
502 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
503 		  MPI_COMM_WORLD);}
504     }
505 }
506 
507 
508 
509 
510 
511 
512 /******************************************************************************
513 Function: giop()
514 
515 Input :
516 Output:
517 Return:
518 Description:
519 
520 ii+1 entries in seg :: 0 .. ii
521 
522 ******************************************************************************/
523 void
524 ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level,
525 	   int *segs)
526 {
527    int edge, type, dest, mask;
528    int stage_n;
529   MPI_Status  status;
530 
531 #ifdef SAFE
532   /* check to make sure comm package has been initialized */
533   if (!p_init)
534     {comm_init();}
535 #endif
536 
537 
538   /* all msgs are *NOT* the same length */
539   /* implement the mesh fan in/out exchange algorithm */
540   for (mask=0, edge=0; edge<level; edge++, mask++)
541     {
542       stage_n = (segs[level] - segs[edge]);
543       if (stage_n && !(my_id & mask))
544 	{
545 	  dest = edge_node[edge];
546 	  type = MSGTAG3 + my_id + (num_nodes*edge);
547 	  if (my_id>dest)
548           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
549                       MPI_COMM_WORLD);}
550 	  else
551 	    {
552 	      type =  type - my_id + dest;
553               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
554                        MPI_COMM_WORLD,&status);
555 	      rvec_add(vals+segs[edge], work, stage_n);
556 	    }
557 	}
558       mask <<= 1;
559     }
560   mask>>=1;
561   for (edge=0; edge<level; edge++)
562     {
563       stage_n = (segs[level] - segs[level-1-edge]);
564       if (stage_n && !(my_id & mask))
565 	{
566 	  dest = edge_node[level-edge-1];
567 	  type = MSGTAG6 + my_id + (num_nodes*edge);
568 	  if (my_id<dest)
569             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
570                       MPI_COMM_WORLD);}
571 	  else
572 	    {
573 	      type =  type - my_id + dest;
574               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
575                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
576 	    }
577 	}
578       mask >>= 1;
579     }
580 }
581 
582 
583 
584 /***********************************comm.c*************************************
585 Function: grop_hc_vvl()
586 
587 Input :
588 Output:
589 Return:
590 Description: fan-in/out version
591 
592 note good only for num_nodes=2^k!!!
593 
594 ***********************************comm.c*************************************/
595 void
596 grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim)
597 {
598    int mask, edge, n;
599   int type, dest;
600   vfp fp;
601   MPI_Status  status;
602 
603   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
604 
605 #ifdef SAFE
606   /* ok ... should have some data, work, and operator(s) */
607   if (!vals||!work||!oprs||!segs)
608     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
609 
610   /* non-uniform should have at least two entries */
611 
612   /* check to make sure comm package has been initialized */
613   if (!p_init)
614     {comm_init();}
615 #endif
616 
617   /* if there's nothing to do return */
618   if ((num_nodes<2)||(dim<=0))
619     {return;}
620 
621   /* the error msg says it all!!! */
622   if (modfl_num_nodes)
623     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
624 
625   /* can't do more dimensions then exist */
626   dim = PetscMin(dim,i_log2_num_nodes);
627 
628   /* advance to list of n operations for custom */
629   if ((type=oprs[0])==NON_UNIFORM)
630     {oprs++;}
631 
632   if (!(fp = (vfp) rvec_fct_addr(type))){
633     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
634     fp = (vfp) oprs;
635   }
636 
637   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
638     {
639       n = segs[dim]-segs[edge];
640       dest = my_id^mask;
641       if (my_id > dest)
642 	{MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
643       else
644 	{
645 	  MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
646 		   MPI_COMM_WORLD, &status);
647 	  (*fp)(vals+segs[edge], work, n, oprs);
648 	}
649     }
650 
651   if (edge==dim)
652     {mask>>=1;}
653   else
654     {while (++edge<dim) {mask<<=1;}}
655 
656   for (edge=0; edge<dim; edge++,mask>>=1)
657     {
658       if (my_id%mask)
659 	{continue;}
660 
661       n = (segs[dim]-segs[dim-1-edge]);
662 
663       dest = my_id^mask;
664       if (my_id < dest)
665 	{MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id,
666 		  MPI_COMM_WORLD);}
667       else
668 	{
669 	  MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,
670 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
671 	}
672     }
673 }
674 
675 /******************************************************************************
676 Function: giop()
677 
678 Input :
679 Output:
680 Return:
681 Description:
682 
683 ii+1 entries in seg :: 0 .. ii
684 
685 ******************************************************************************/
686 void new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
687 {
688    int edge, type, dest, mask;
689    int stage_n;
690   MPI_Status  status;
691 
692 #ifdef SAFE
693   /* check to make sure comm package has been initialized */
694   if (!p_init)
695     {comm_init();}
696 #endif
697 
698   /* all msgs are *NOT* the same length */
699   /* implement the mesh fan in/out exchange algorithm */
700   for (mask=0, edge=0; edge<level; edge++, mask++)
701     {
702       stage_n = (segs[level] - segs[edge]);
703       if (stage_n && !(my_id & mask))
704 	{
705 	  dest = edge_node[edge];
706 	  type = MSGTAG3 + my_id + (num_nodes*edge);
707 	  if (my_id>dest)
708           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
709                       MPI_COMM_WORLD);}
710 	  else
711 	    {
712 	      type =  type - my_id + dest;
713               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
714                        MPI_COMM_WORLD,&status);
715 	      rvec_add(vals+segs[edge], work, stage_n);
716 	    }
717 	}
718       mask <<= 1;
719     }
720   mask>>=1;
721   for (edge=0; edge<level; edge++)
722     {
723       stage_n = (segs[level] - segs[level-1-edge]);
724       if (stage_n && !(my_id & mask))
725 	{
726 	  dest = edge_node[level-edge-1];
727 	  type = MSGTAG6 + my_id + (num_nodes*edge);
728 	  if (my_id<dest)
729             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
730                       MPI_COMM_WORLD);}
731 	  else
732 	    {
733 	      type =  type - my_id + dest;
734               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
735                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
736 	    }
737 	}
738       mask >>= 1;
739     }
740 }
741 
742 
743 
744 /***********************************comm.c*************************************
745 Function: giop()
746 
747 Input :
748 Output:
749 Return:
750 Description: fan-in/out version
751 
752 note good only for num_nodes=2^k!!!
753 
754 ***********************************comm.c*************************************/
755 void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
756 {
757    int mask, edge;
758   int type, dest;
759   vfp fp;
760   MPI_Status  status;
761 
762 #ifdef SAFE
763   /* ok ... should have some data, work, and operator(s) */
764   if (!vals||!work||!oprs)
765     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
766 
767   /* non-uniform should have at least two entries */
768   if ((oprs[0] == NON_UNIFORM)&&(n<2))
769     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
770 
771   /* check to make sure comm package has been initialized */
772   if (!p_init)
773     {comm_init();}
774 #endif
775 
776   /* if there's nothing to do return */
777   if ((num_nodes<2)||(!n)||(dim<=0))
778     {return;}
779 
780   /* the error msg says it all!!! */
781   if (modfl_num_nodes)
782     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
783 
784   /* a negative number of items to send ==> fatal */
785   if (n<0)
786     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
787 
788   /* can't do more dimensions then exist */
789   dim = PetscMin(dim,i_log2_num_nodes);
790 
791   /* advance to list of n operations for custom */
792   if ((type=oprs[0])==NON_UNIFORM)
793     {oprs++;}
794 
795   if (!(fp = (vfp) ivec_fct_addr(type))){
796     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
797     fp = (vfp) oprs;
798   }
799 
800   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
801     {
802       dest = my_id^mask;
803       if (my_id > dest)
804 	{MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
805       else
806 	{
807 	  MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
808 		   &status);
809 	  (*fp)(vals, work, n, oprs);
810 	}
811     }
812 
813   if (edge==dim)
814     {mask>>=1;}
815   else
816     {while (++edge<dim) {mask<<=1;}}
817 
818   for (edge=0; edge<dim; edge++,mask>>=1)
819     {
820       if (my_id%mask)
821 	{continue;}
822 
823       dest = my_id^mask;
824       if (my_id < dest)
825 	{MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
826       else
827 	{
828 	  MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
829 		   &status);
830 	}
831     }
832 }
833