xref: /petsc/src/ksp/pc/impls/tfs/comm.c (revision 8bc8193efbc389280f83b3d41dffa9e2d23e2ace)
1 
2 /***********************************comm.c*************************************
3 SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 11.21.97
16 ***********************************comm.c*************************************/
17 
18 /***********************************comm.c*************************************
19 File Description:
20 -----------------
21 
22 ***********************************comm.c*************************************/
23 #include "petsc.h"
24 
25 #include "const.h"
26 #include "types.h"
27 #include "error.h"
28 #include "ivec.h"
29 #include "bss_malloc.h"
30 #include "comm.h"
31 
32 
33 /* global program control variables - explicitly exported */
34 int my_id            = 0;
35 int num_nodes        = 1;
36 int floor_num_nodes  = 0;
37 int i_log2_num_nodes = 0;
38 
39 /* global program control variables */
40 static int p_init = 0;
41 static int modfl_num_nodes;
42 static int edge_not_pow_2;
43 
44 static unsigned int edge_node[INT_LEN*32];
45 
46 /***********************************comm.c*************************************
47 Function: giop()
48 
49 Input :
50 Output:
51 Return:
52 Description:
53 ***********************************comm.c*************************************/
54 void
55 comm_init (void)
56 {
57 #ifdef DEBUG
58   error_msg_warning("c_init() :: start\n");
59 #endif
60 
61   if (p_init++) return;
62 
63 #if PETSC_SIZEOF_INT != 4
64   error_msg_warning("Int != 4 Bytes!");
65 #endif
66 
67 
68   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
69   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
70 
71   if (num_nodes> (INT_MAX >> 1))
72   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}
73 
74   ivec_zero((int*)edge_node,INT_LEN*32);
75 
76   floor_num_nodes = 1;
77   i_log2_num_nodes = modfl_num_nodes = 0;
78   while (floor_num_nodes <= num_nodes)
79     {
80       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
81       floor_num_nodes <<= 1;
82       i_log2_num_nodes++;
83     }
84 
85   i_log2_num_nodes--;
86   floor_num_nodes >>= 1;
87   modfl_num_nodes = (num_nodes - floor_num_nodes);
88 
89   if ((my_id > 0) && (my_id <= modfl_num_nodes))
90     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
91   else if (my_id >= floor_num_nodes)
92     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
93     }
94   else
95     {edge_not_pow_2 = 0;}
96 
97 #ifdef DEBUG
98   error_msg_warning("c_init() done :: my_id=%d, num_nodes=%D",my_id,num_nodes);
99 #endif
100 }
101 
102 
103 
104 /***********************************comm.c*************************************
105 Function: giop()
106 
107 Input :
108 Output:
109 Return:
110 Description: fan-in/out version
111 ***********************************comm.c*************************************/
112 void
113 giop(int *vals, int *work, int n, int *oprs)
114 {
115   register int mask, edge;
116   int type, dest;
117   vfp fp;
118   MPI_Status  status;
119 
120 
121 #ifdef SAFE
122   /* ok ... should have some data, work, and operator(s) */
123   if (!vals||!work||!oprs)
124     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
125 
126   /* non-uniform should have at least two entries */
127   if ((oprs[0] == NON_UNIFORM)&&(n<2))
128     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}
129 
130   /* check to make sure comm package has been initialized */
131   if (!p_init)
132     {comm_init();}
133 #endif
134 
135   /* if there's nothing to do return */
136   if ((num_nodes<2)||(!n))
137     {
138 #ifdef DEBUG
139       error_msg_warning("giop() :: n=%D num_nodes=%d",n,num_nodes);
140 #endif
141       return;
142     }
143 
144   /* a negative number if items to send ==> fatal */
145   if (n<0)
146     {error_msg_fatal("giop() :: n=%D<0?",n);}
147 
148   /* advance to list of n operations for custom */
149   if ((type=oprs[0])==NON_UNIFORM)
150     {oprs++;}
151 
152   /* major league hack */
153   if (!(fp = (vfp) ivec_fct_addr(type))) {
154     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
155     fp = (vfp) oprs;
156   }
157 
158   /* all msgs will be of the same length */
159   /* if not a hypercube must colapse partial dim */
160   if (edge_not_pow_2)
161     {
162       if (my_id >= floor_num_nodes)
163 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
164       else
165 	{
166 	  MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
167 		   MPI_COMM_WORLD,&status);
168 	  (*fp)(vals,work,n,oprs);
169 	}
170     }
171 
172   /* implement the mesh fan in/out exchange algorithm */
173   if (my_id<floor_num_nodes)
174     {
175       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
176 	{
177 	  dest = my_id^mask;
178 	  if (my_id > dest)
179 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
180 	  else
181 	    {
182 	      MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
183 		       MPI_COMM_WORLD, &status);
184 	      (*fp)(vals, work, n, oprs);
185 	    }
186 	}
187 
188       mask=floor_num_nodes>>1;
189       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
190 	{
191 	  if (my_id%mask)
192 	    {continue;}
193 
194 	  dest = my_id^mask;
195 	  if (my_id < dest)
196 	    {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
197 	  else
198 	    {
199 	      MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
200 		       MPI_COMM_WORLD, &status);
201 	    }
202 	}
203     }
204 
205   /* if not a hypercube must expand to partial dim */
206   if (edge_not_pow_2)
207     {
208       if (my_id >= floor_num_nodes)
209 	{
210 	  MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
211 		   MPI_COMM_WORLD,&status);
212 	}
213       else
214 	{MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
215     }
216 }
217 
218 /***********************************comm.c*************************************
219 Function: grop()
220 
221 Input :
222 Output:
223 Return:
224 Description: fan-in/out version
225 ***********************************comm.c*************************************/
226 void
227 grop(REAL *vals, REAL *work, int n, int *oprs)
228 {
229   register int mask, edge;
230   int type, dest;
231   vfp fp;
232   MPI_Status  status;
233 
234 
235 #ifdef SAFE
236   /* ok ... should have some data, work, and operator(s) */
237   if (!vals||!work||!oprs)
238     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
239 
240   /* non-uniform should have at least two entries */
241   if ((oprs[0] == NON_UNIFORM)&&(n<2))
242     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}
243 
244   /* check to make sure comm package has been initialized */
245   if (!p_init)
246     {comm_init();}
247 #endif
248 
249   /* if there's nothing to do return */
250   if ((num_nodes<2)||(!n))
251     {return;}
252 
253   /* a negative number of items to send ==> fatal */
254   if (n<0)
255     {error_msg_fatal("gdop() :: n=%D<0?",n);}
256 
257   /* advance to list of n operations for custom */
258   if ((type=oprs[0])==NON_UNIFORM)
259     {oprs++;}
260 
261   if (!(fp = (vfp) rvec_fct_addr(type))) {
262     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
263     fp = (vfp) oprs;
264   }
265 
266   /* all msgs will be of the same length */
267   /* if not a hypercube must colapse partial dim */
268   if (edge_not_pow_2)
269     {
270       if (my_id >= floor_num_nodes)
271 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
272 		  MPI_COMM_WORLD);}
273       else
274 	{
275 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
276 		   MPI_COMM_WORLD,&status);
277 	  (*fp)(vals,work,n,oprs);
278 	}
279     }
280 
281   /* implement the mesh fan in/out exchange algorithm */
282   if (my_id<floor_num_nodes)
283     {
284       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
285 	{
286 	  dest = my_id^mask;
287 	  if (my_id > dest)
288 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
289 	  else
290 	    {
291 	      MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
292 		       MPI_COMM_WORLD, &status);
293 	      (*fp)(vals, work, n, oprs);
294 	    }
295 	}
296 
297       mask=floor_num_nodes>>1;
298       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
299 	{
300 	  if (my_id%mask)
301 	    {continue;}
302 
303 	  dest = my_id^mask;
304 	  if (my_id < dest)
305 	    {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
306 	  else
307 	    {
308 	      MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
309 		       MPI_COMM_WORLD, &status);
310 	    }
311 	}
312     }
313 
314   /* if not a hypercube must expand to partial dim */
315   if (edge_not_pow_2)
316     {
317       if (my_id >= floor_num_nodes)
318 	{
319 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
320 		   MPI_COMM_WORLD,&status);
321 	}
322       else
323 	{MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
324 		  MPI_COMM_WORLD);}
325     }
326 }
327 
328 
329 /***********************************comm.c*************************************
330 Function: grop()
331 
332 Input :
333 Output:
334 Return:
335 Description: fan-in/out version
336 
337 note good only for num_nodes=2^k!!!
338 
339 ***********************************comm.c*************************************/
340 void
341 grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
342 {
343   register int mask, edge;
344   int type, dest;
345   vfp fp;
346   MPI_Status  status;
347 
348 #ifdef SAFE
349   /* ok ... should have some data, work, and operator(s) */
350   if (!vals||!work||!oprs)
351     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
352 
353   /* non-uniform should have at least two entries */
354   if ((oprs[0] == NON_UNIFORM)&&(n<2))
355     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
356 
357   /* check to make sure comm package has been initialized */
358   if (!p_init)
359     {comm_init();}
360 #endif
361 
362   /* if there's nothing to do return */
363   if ((num_nodes<2)||(!n)||(dim<=0))
364     {return;}
365 
366   /* the error msg says it all!!! */
367   if (modfl_num_nodes)
368     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
369 
370   /* a negative number of items to send ==> fatal */
371   if (n<0)
372     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}
373 
374   /* can't do more dimensions then exist */
375   dim = PetscMin(dim,i_log2_num_nodes);
376 
377   /* advance to list of n operations for custom */
378   if ((type=oprs[0])==NON_UNIFORM)
379     {oprs++;}
380 
381   if (!(fp = (vfp) rvec_fct_addr(type))) {
382     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
383     fp = (vfp) oprs;
384   }
385 
386   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
387     {
388       dest = my_id^mask;
389       if (my_id > dest)
390 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
391       else
392 	{
393 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
394 		   &status);
395 	  (*fp)(vals, work, n, oprs);
396 	}
397     }
398 
399   if (edge==dim)
400     {mask>>=1;}
401   else
402     {while (++edge<dim) {mask<<=1;}}
403 
404   for (edge=0; edge<dim; edge++,mask>>=1)
405     {
406       if (my_id%mask)
407 	{continue;}
408 
409       dest = my_id^mask;
410       if (my_id < dest)
411 	{MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
412       else
413 	{
414 	  MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
415 		   &status);
416 	}
417     }
418 }
419 
420 
421 /***********************************comm.c*************************************
422 Function: gop()
423 
424 Input :
425 Output:
426 Return:
427 Description: fan-in/out version
428 ***********************************comm.c*************************************/
429 void gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
430 {
431   register int mask, edge;
432   int dest;
433   MPI_Status  status;
434   MPI_Op op;
435 
436 #ifdef SAFE
437   /* check to make sure comm package has been initialized */
438   if (!p_init)
439     {comm_init();}
440 
441   /* ok ... should have some data, work, and operator(s) */
442   if (!vals||!work||!fp)
443     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
444 #endif
445 
446   /* if there's nothing to do return */
447   if ((num_nodes<2)||(!n))
448     {return;}
449 
450   /* a negative number of items to send ==> fatal */
451   if (n<0)
452     {error_msg_fatal("gop() :: n=%D<0?",n);}
453 
454   if (comm_type==MPI)
455     {
456       MPI_Op_create(fp,TRUE,&op);
457       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
458       MPI_Op_free(&op);
459       return;
460     }
461 
462   /* if not a hypercube must colapse partial dim */
463   if (edge_not_pow_2)
464     {
465       if (my_id >= floor_num_nodes)
466 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
467 		  MPI_COMM_WORLD);}
468       else
469 	{
470 	  MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
471 		   MPI_COMM_WORLD,&status);
472 	  (*fp)(vals,work,&n,&dt);
473 	}
474     }
475 
476   /* implement the mesh fan in/out exchange algorithm */
477   if (my_id<floor_num_nodes)
478     {
479       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
480 	{
481 	  dest = my_id^mask;
482 	  if (my_id > dest)
483 	    {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
484 	  else
485 	    {
486 	      MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
487 		       MPI_COMM_WORLD, &status);
488 	      (*fp)(vals, work, &n, &dt);
489 	    }
490 	}
491 
492       mask=floor_num_nodes>>1;
493       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
494 	{
495 	  if (my_id%mask)
496 	    {continue;}
497 
498 	  dest = my_id^mask;
499 	  if (my_id < dest)
500 	    {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
501 	  else
502 	    {
503 	      MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
504 		       MPI_COMM_WORLD, &status);
505 	    }
506 	}
507     }
508 
509   /* if not a hypercube must expand to partial dim */
510   if (edge_not_pow_2)
511     {
512       if (my_id >= floor_num_nodes)
513 	{
514 	  MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
515 		   MPI_COMM_WORLD,&status);
516 	}
517       else
518 	{MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
519 		  MPI_COMM_WORLD);}
520     }
521 }
522 
523 
524 
525 
526 
527 
528 /******************************************************************************
529 Function: giop()
530 
531 Input :
532 Output:
533 Return:
534 Description:
535 
536 ii+1 entries in seg :: 0 .. ii
537 
538 ******************************************************************************/
539 void
540 ssgl_radd(register REAL *vals, register REAL *work, register int level,
541 	  register int *segs)
542 {
543   register int edge, type, dest, mask;
544   register int stage_n;
545   MPI_Status  status;
546 
547 #ifdef DEBUG
548   if (level > i_log2_num_nodes)
549     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
550 #endif
551 
552 #ifdef SAFE
553   /* check to make sure comm package has been initialized */
554   if (!p_init)
555     {comm_init();}
556 #endif
557 
558 
559   /* all msgs are *NOT* the same length */
560   /* implement the mesh fan in/out exchange algorithm */
561   for (mask=0, edge=0; edge<level; edge++, mask++)
562     {
563       stage_n = (segs[level] - segs[edge]);
564       if (stage_n && !(my_id & mask))
565 	{
566 	  dest = edge_node[edge];
567 	  type = MSGTAG3 + my_id + (num_nodes*edge);
568 	  if (my_id>dest)
569 /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
570           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
571                       MPI_COMM_WORLD);}
572 	  else
573 	    {
574 	      type =  type - my_id + dest;
575 /*	      crecv(type,work,stage_n*REAL_LEN); */
576               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
577                        MPI_COMM_WORLD,&status);
578 	      rvec_add(vals+segs[edge], work, stage_n);
579 /*            daxpy(vals+segs[edge], work, stage_n); */
580 	    }
581 	}
582       mask <<= 1;
583     }
584   mask>>=1;
585   for (edge=0; edge<level; edge++)
586     {
587       stage_n = (segs[level] - segs[level-1-edge]);
588       if (stage_n && !(my_id & mask))
589 	{
590 	  dest = edge_node[level-edge-1];
591 	  type = MSGTAG6 + my_id + (num_nodes*edge);
592 	  if (my_id<dest)
593 /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
594             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
595                       MPI_COMM_WORLD);}
596 	  else
597 	    {
598 	      type =  type - my_id + dest;
599 /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
600               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
601                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
602 	    }
603 	}
604       mask >>= 1;
605     }
606 }
607 
608 
609 
610 /***********************************comm.c*************************************
611 Function: grop_hc_vvl()
612 
613 Input :
614 Output:
615 Return:
616 Description: fan-in/out version
617 
618 note good only for num_nodes=2^k!!!
619 
620 ***********************************comm.c*************************************/
621 void
622 grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
623 {
624   register int mask, edge, n;
625   int type, dest;
626   vfp fp;
627   MPI_Status  status;
628 
629   error_msg_fatal("grop_hc_vvl() :: is not working!\n");
630 
631 #ifdef SAFE
632   /* ok ... should have some data, work, and operator(s) */
633   if (!vals||!work||!oprs||!segs)
634     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
635 
636   /* non-uniform should have at least two entries */
637 #if defined(not_used)
638   if ((oprs[0] == NON_UNIFORM)&&(n<2))
639     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
640 #endif
641 
642   /* check to make sure comm package has been initialized */
643   if (!p_init)
644     {comm_init();}
645 #endif
646 
647   /* if there's nothing to do return */
648   if ((num_nodes<2)||(dim<=0))
649     {return;}
650 
651   /* the error msg says it all!!! */
652   if (modfl_num_nodes)
653     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}
654 
655   /* can't do more dimensions then exist */
656   dim = PetscMin(dim,i_log2_num_nodes);
657 
658   /* advance to list of n operations for custom */
659   if ((type=oprs[0])==NON_UNIFORM)
660     {oprs++;}
661 
662   if (!(fp = (vfp) rvec_fct_addr(type))){
663     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
664     fp = (vfp) oprs;
665   }
666 
667   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
668     {
669       n = segs[dim]-segs[edge];
670       dest = my_id^mask;
671       if (my_id > dest)
672 	{MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
673       else
674 	{
675 	  MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
676 		   MPI_COMM_WORLD, &status);
677 	  (*fp)(vals+segs[edge], work, n, oprs);
678 	}
679     }
680 
681   if (edge==dim)
682     {mask>>=1;}
683   else
684     {while (++edge<dim) {mask<<=1;}}
685 
686   for (edge=0; edge<dim; edge++,mask>>=1)
687     {
688       if (my_id%mask)
689 	{continue;}
690 
691       n = (segs[dim]-segs[dim-1-edge]);
692 
693       dest = my_id^mask;
694       if (my_id < dest)
695 	{MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
696 		  MPI_COMM_WORLD);}
697       else
698 	{
699 	  MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
700 		   MSGTAG4+dest,MPI_COMM_WORLD, &status);
701 	}
702     }
703 }
704 
705 /******************************************************************************
706 Function: giop()
707 
708 Input :
709 Output:
710 Return:
711 Description:
712 
713 ii+1 entries in seg :: 0 .. ii
714 
715 ******************************************************************************/
716 void new_ssgl_radd(register REAL *vals, register REAL *work, register int level,register int *segs)
717 {
718   register int edge, type, dest, mask;
719   register int stage_n;
720   MPI_Status  status;
721 
722 
723 #ifdef DEBUG
724   if (level > i_log2_num_nodes)
725     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
726 #endif
727 
728 #ifdef SAFE
729   /* check to make sure comm package has been initialized */
730   if (!p_init)
731     {comm_init();}
732 #endif
733 
734   /* all msgs are *NOT* the same length */
735   /* implement the mesh fan in/out exchange algorithm */
736   for (mask=0, edge=0; edge<level; edge++, mask++)
737     {
738       stage_n = (segs[level] - segs[edge]);
739       if (stage_n && !(my_id & mask))
740 	{
741 	  dest = edge_node[edge];
742 	  type = MSGTAG3 + my_id + (num_nodes*edge);
743 	  if (my_id>dest)
744 /*	    {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
745           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
746                       MPI_COMM_WORLD);}
747 	  else
748 	    {
749 	      type =  type - my_id + dest;
750 /*	      crecv(type,work,stage_n*REAL_LEN); */
751               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
752                        MPI_COMM_WORLD,&status);
753 	      rvec_add(vals+segs[edge], work, stage_n);
754 /*            daxpy(vals+segs[edge], work, stage_n); */
755 	    }
756 	}
757       mask <<= 1;
758     }
759   mask>>=1;
760   for (edge=0; edge<level; edge++)
761     {
762       stage_n = (segs[level] - segs[level-1-edge]);
763       if (stage_n && !(my_id & mask))
764 	{
765 	  dest = edge_node[level-edge-1];
766 	  type = MSGTAG6 + my_id + (num_nodes*edge);
767 	  if (my_id<dest)
768 /*	    {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
769             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
770                       MPI_COMM_WORLD);}
771 	  else
772 	    {
773 	      type =  type - my_id + dest;
774 /* 	      crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
775               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
776                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
777 	    }
778 	}
779       mask >>= 1;
780     }
781 }
782 
783 
784 
785 /***********************************comm.c*************************************
786 Function: giop()
787 
788 Input :
789 Output:
790 Return:
791 Description: fan-in/out version
792 
793 note good only for num_nodes=2^k!!!
794 
795 ***********************************comm.c*************************************/
796 void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
797 {
798   register int mask, edge;
799   int type, dest;
800   vfp fp;
801   MPI_Status  status;
802 
803 #ifdef SAFE
804   /* ok ... should have some data, work, and operator(s) */
805   if (!vals||!work||!oprs)
806     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}
807 
808   /* non-uniform should have at least two entries */
809   if ((oprs[0] == NON_UNIFORM)&&(n<2))
810     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}
811 
812   /* check to make sure comm package has been initialized */
813   if (!p_init)
814     {comm_init();}
815 #endif
816 
817   /* if there's nothing to do return */
818   if ((num_nodes<2)||(!n)||(dim<=0))
819     {return;}
820 
821   /* the error msg says it all!!! */
822   if (modfl_num_nodes)
823     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}
824 
825   /* a negative number of items to send ==> fatal */
826   if (n<0)
827     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}
828 
829   /* can't do more dimensions then exist */
830   dim = PetscMin(dim,i_log2_num_nodes);
831 
832   /* advance to list of n operations for custom */
833   if ((type=oprs[0])==NON_UNIFORM)
834     {oprs++;}
835 
836   if (!(fp = (vfp) ivec_fct_addr(type))){
837     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
838     fp = (vfp) oprs;
839   }
840 
841   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
842     {
843       dest = my_id^mask;
844       if (my_id > dest)
845 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
846       else
847 	{
848 	  MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
849 		   &status);
850 	  (*fp)(vals, work, n, oprs);
851 	}
852     }
853 
854   if (edge==dim)
855     {mask>>=1;}
856   else
857     {while (++edge<dim) {mask<<=1;}}
858 
859   for (edge=0; edge<dim; edge++,mask>>=1)
860     {
861       if (my_id%mask)
862 	{continue;}
863 
864       dest = my_id^mask;
865       if (my_id < dest)
866 	{MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
867       else
868 	{
869 	  MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
870 		   &status);
871 	}
872     }
873 }
874