xref: /petsc/src/ksp/pc/impls/tfs/gs.c (revision 2a6744eb01855f5aa328eb8fdf4b0d01e72ad151)
1 #define PETSCKSP_DLL
2 
3 /***********************************gs.c***************************************
4 
5 Author: Henry M. Tufo III
6 
7 e-mail: hmt@cs.brown.edu
8 
9 snail-mail:
10 Division of Applied Mathematics
11 Brown University
12 Providence, RI 02912
13 
14 Last Modification:
15 6.21.97
16 ************************************gs.c**************************************/
17 
18 /***********************************gs.c***************************************
19 File Description:
20 -----------------
21 
22 ************************************gs.c**************************************/
23 
24 #include "src/ksp/pc/impls/tfs/tfs.h"
25 
26 /* default length of number of items via tree - doubles if exceeded */
27 #define TREE_BUF_SZ 2048;
28 #define GS_VEC_SZ   1
29 
30 
31 
32 /***********************************gs.c***************************************
33 Type: struct gather_scatter_id
34 ------------------------------
35 
36 ************************************gs.c**************************************/
37 typedef struct gather_scatter_id {
38   int id;
39   int nel_min;
40   int nel_max;
41   int nel_sum;
42   int negl;
43   int gl_max;
44   int gl_min;
45   int repeats;
46   int ordered;
47   int positive;
48   PetscScalar *vals;
49 
50   /* bit mask info */
51   int *my_proc_mask;
52   int mask_sz;
53   int *ngh_buf;
54   int ngh_buf_sz;
55   int *nghs;
56   int num_nghs;
57   int max_nghs;
58   int *pw_nghs;
59   int num_pw_nghs;
60   int *tree_nghs;
61   int num_tree_nghs;
62 
63   int num_loads;
64 
65   /* repeats == true -> local info */
66   int nel;         /* number of unique elememts */
67   int *elms;       /* of size nel */
68   int nel_total;
69   int *local_elms; /* of size nel_total */
70   int *companion;  /* of size nel_total */
71 
72   /* local info */
73   int num_local_total;
74   int local_strength;
75   int num_local;
76   int *num_local_reduce;
77   int **local_reduce;
78   int num_local_gop;
79   int *num_gop_local_reduce;
80   int **gop_local_reduce;
81 
82   /* pairwise info */
83   int level;
84   int num_pairs;
85   int max_pairs;
86   int loc_node_pairs;
87   int max_node_pairs;
88   int min_node_pairs;
89   int avg_node_pairs;
90   int *pair_list;
91   int *msg_sizes;
92   int **node_list;
93   int len_pw_list;
94   int *pw_elm_list;
95   PetscScalar *pw_vals;
96 
97   MPI_Request *msg_ids_in;
98   MPI_Request *msg_ids_out;
99 
100   PetscScalar *out;
101   PetscScalar *in;
102   int msg_total;
103 
104   /* tree - crystal accumulator info */
105   int max_left_over;
106   int *pre;
107   int *in_num;
108   int *out_num;
109   int **in_list;
110   int **out_list;
111 
112   /* new tree work*/
113   int  tree_nel;
114   int *tree_elms;
115   PetscScalar *tree_buf;
116   PetscScalar *tree_work;
117 
118   int  tree_map_sz;
119   int *tree_map_in;
120   int *tree_map_out;
121 
122   /* current memory status */
123   int gl_bss_min;
124   int gl_perm_min;
125 
126   /* max segment size for gs_gop_vec() */
127   int vec_sz;
128 
129   /* hack to make paul happy */
130   MPI_Comm gs_comm;
131 
132 } gs_id;
133 
134 
135 /* to be made public */
136 
137 /* PRIVATE - and definitely not exported */
138 /*static void gs_print_template( gs_id* gs, int who);*/
139 /*static void gs_print_stemplate( gs_id* gs, int who);*/
140 
141 static gs_id *gsi_check_args(int *elms, int nel, int level);
142 static void gsi_via_bit_mask(gs_id *gs);
143 static void get_ngh_buf(gs_id *gs);
144 static void set_pairwise(gs_id *gs);
145 static gs_id * gsi_new(void);
146 static void set_tree(gs_id *gs);
147 
148 /* same for all but vector flavor */
149 static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150 /* vector flavor */
151 static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
152 
153 static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154 static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155 static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156 static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157 static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
158 
159 
160 static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161 static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162 static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163 static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164 static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
165 
166 static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167 static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168 static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
169 
170 static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171 static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172 static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173 static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174 static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
175 
176 static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177 static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178 static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179 static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180 static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
181 
182 static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183 static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184 static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185 static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186 static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
187 
188 static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189 static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190 static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191 static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192 static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
193 
194 static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195 static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196 static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197 static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198 static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
199 
200 static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201 static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202 static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203 static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204 static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
205 
206 static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207 static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208 static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209 static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
210 
211 
212 
213 /* global vars */
214 /* from comm.c module */
215 
216 /* module state inf and fortran interface */
217 static int num_gs_ids = 0;
218 
219 /* should make this dynamic ... later */
220 static int msg_buf=MAX_MSG_BUF;
221 static int vec_sz=GS_VEC_SZ;
222 static int *tree_buf=NULL;
223 static int tree_buf_sz=0;
224 static int ntree=0;
225 
226 
227 /******************************************************************************
228 Function: gs_init_()
229 
230 Input :
231 Output:
232 Return:
233 Description:
234 ******************************************************************************/
235 void gs_init_vec_sz(int size)
236 {
237   /*  vec_ch = TRUE; */
238 
239   vec_sz = size;
240 }
241 
242 /******************************************************************************
243 Function: gs_init_()
244 
245 Input :
246 Output:
247 Return:
248 Description:
249 ******************************************************************************/
250 void gs_init_msg_buf_sz(int buf_size)
251 {
252   /*  msg_ch = TRUE; */
253 
254   msg_buf = buf_size;
255 }
256 
257 /******************************************************************************
258 Function: gs_init()
259 
260 Input :
261 
262 Output:
263 
264 RETURN:
265 
266 Description:
267 ******************************************************************************/
268 gs_id *
269 gs_init( int *elms, int nel, int level)
270 {
271    gs_id *gs;
272   MPI_Group gs_group;
273   MPI_Comm  gs_comm;
274 
275   /* ensure that communication package has been initialized */
276   comm_init();
277 
278 
279   /* determines if we have enough dynamic/semi-static memory */
280   /* checks input, allocs and sets gd_id template            */
281   gs = gsi_check_args(elms,nel,level);
282 
283   /* only bit mask version up and working for the moment    */
284   /* LATER :: get int list version working for sparse pblms */
285   gsi_via_bit_mask(gs);
286 
287 
288   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290   gs->gs_comm=gs_comm;
291 
292   return(gs);
293 }
294 
295 
296 
297 /******************************************************************************
298 Function: gsi_new()
299 
300 Input :
301 Output:
302 Return:
303 Description:
304 
305 elm list must >= 0!!!
306 elm repeats allowed
307 ******************************************************************************/
308 static
309 gs_id *
310 gsi_new(void)
311 {
312   gs_id *gs;
313   gs = (gs_id *) malloc(sizeof(gs_id));
314   PetscMemzero(gs,sizeof(gs_id));
315   return(gs);
316 }
317 
318 
319 
320 /******************************************************************************
321 Function: gsi_check_args()
322 
323 Input :
324 Output:
325 Return:
326 Description:
327 
328 elm list must >= 0!!!
329 elm repeats allowed
330 local working copy of elms is sorted
331 ******************************************************************************/
332 static
333 gs_id *
334 gsi_check_args(int *in_elms, int nel, int level)
335 {
336    int i, j, k, t2;
337   int *companion, *elms, *unique, *iptr;
338   int num_local=0, *num_to_reduce, **local_reduce;
339   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
340   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
341   int work[sizeof(oprs)/sizeof(oprs[0])-1];
342   gs_id *gs;
343 
344 
345 
346 #ifdef SAFE
347   if (!in_elms)
348     {error_msg_fatal("elms point to nothing!!!\n");}
349 
350   if (nel<0)
351     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
352 
353   if (nel==0)
354     {error_msg_warning("I don't have any elements!!!\n");}
355 #endif
356 
357   /* get space for gs template */
358   gs = gsi_new();
359   gs->id = ++num_gs_ids;
360 
361   /* hmt 6.4.99                                            */
362   /* caller can set global ids that don't participate to 0 */
363   /* gs_init ignores all zeros in elm list                 */
364   /* negative global ids are still invalid                 */
365   for (i=j=0;i<nel;i++)
366     {if (in_elms[i]!=0) {j++;}}
367 
368   k=nel; nel=j;
369 
370   /* copy over in_elms list and create inverse map */
371   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
372   companion = (int*) malloc(nel*sizeof(PetscInt));
373   /* ivec_c_index(companion,nel); */
374   /* ivec_copy(elms,in_elms,nel); */
375   for (i=j=0;i<k;i++)
376     {
377       if (in_elms[i]!=0)
378         {elms[j] = in_elms[i]; companion[j++] = i;}
379     }
380 
381   if (j!=nel)
382     {error_msg_fatal("nel j mismatch!\n");}
383 
384 #ifdef SAFE
385   /* pre-pass ... check to see if sorted */
386   elms[nel] = INT_MAX;
387   iptr = elms;
388   unique = elms+1;
389   j=0;
390   while (*iptr!=INT_MAX)
391     {
392       if (*iptr++>*unique++)
393         {j=1; break;}
394     }
395 
396   /* set up inverse map */
397   if (j)
398     {
399       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
400       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
401     }
402   else
403     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
404 #else
405   SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
406 #endif
407   elms[nel] = INT_MIN;
408 
409   /* first pass */
410   /* determine number of unique elements, check pd */
411   for (i=k=0;i<nel;i+=j)
412     {
413       t2 = elms[i];
414       j=++i;
415 
416       /* clump 'em for now */
417       while (elms[j]==t2) {j++;}
418 
419       /* how many together and num local */
420       if (j-=i)
421         {num_local++; k+=j;}
422     }
423 
424   /* how many unique elements? */
425   gs->repeats=k;
426   gs->nel = nel-k;
427 
428 
429   /* number of repeats? */
430   gs->num_local = num_local;
431   num_local+=2;
432   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
433   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
434 
435   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
436   gs->elms = unique;
437   gs->nel_total = nel;
438   gs->local_elms = elms;
439   gs->companion = companion;
440 
441   /* compess map as well as keep track of local ops */
442   for (num_local=i=j=0;i<gs->nel;i++)
443     {
444       k=j;
445       t2 = unique[i] = elms[j];
446       companion[i] = companion[j];
447 
448       while (elms[j]==t2) {j++;}
449 
450       if ((t2=(j-k))>1)
451         {
452           /* number together */
453           num_to_reduce[num_local] = t2++;
454           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
455 
456           /* to use binary searching don't remap until we check intersection */
457           *iptr++ = i;
458 
459           /* note that we're skipping the first one */
460           while (++k<j)
461             {*(iptr++) = companion[k];}
462           *iptr = -1;
463         }
464     }
465 
466   /* sentinel for ngh_buf */
467   unique[gs->nel]=INT_MAX;
468 
469   /* for two partition sort hack */
470   num_to_reduce[num_local] = 0;
471   local_reduce[num_local] = NULL;
472   num_to_reduce[++num_local] = 0;
473   local_reduce[num_local] = NULL;
474 
475   /* load 'em up */
476   /* note one extra to hold NON_UNIFORM flag!!! */
477   vals[2] = vals[1] = vals[0] = nel;
478   if (gs->nel>0)
479     {
480        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
481        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
482     }
483   else
484     {
485        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
486        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
487     }
488   vals[5] = level;
489   vals[6] = num_gs_ids;
490 
491   /* GLOBAL: send 'em out */
492   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
493 
494   /* must be semi-pos def - only pairwise depends on this */
495   /* LATER - remove this restriction */
496   if (vals[3]<0)
497     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
498 
499   if (vals[4]==INT_MAX)
500     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
501 
502   gs->nel_min = vals[0];
503   gs->nel_max = vals[1];
504   gs->nel_sum = vals[2];
505   gs->gl_min  = vals[3];
506   gs->gl_max  = vals[4];
507   gs->negl    = vals[4]-vals[3]+1;
508 
509   if (gs->negl<=0)
510     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
511 
512   /* LATER :: add level == -1 -> program selects level */
513   if (vals[5]<0)
514     {vals[5]=0;}
515   else if (vals[5]>num_nodes)
516     {vals[5]=num_nodes;}
517   gs->level = vals[5];
518 
519   return(gs);
520 }
521 
522 
523 /******************************************************************************
524 Function: gsi_via_bit_mask()
525 
526 Input :
527 Output:
528 Return:
529 Description:
530 
531 
532 ******************************************************************************/
533 static
534 void
535 gsi_via_bit_mask(gs_id *gs)
536 {
537    int i, nel, *elms;
538   int t1;
539   int **reduce;
540   int *map;
541 
542   /* totally local removes ... ct_bits == 0 */
543   get_ngh_buf(gs);
544 
545   if (gs->level)
546     {set_pairwise(gs);}
547 
548   if (gs->max_left_over)
549     {set_tree(gs);}
550 
551   /* intersection local and pairwise/tree? */
552   gs->num_local_total = gs->num_local;
553   gs->gop_local_reduce = gs->local_reduce;
554   gs->num_gop_local_reduce = gs->num_local_reduce;
555 
556   map = gs->companion;
557 
558   /* is there any local compression */
559   if (!gs->num_local) {
560     gs->local_strength = NONE;
561     gs->num_local_gop = 0;
562   } else {
563       /* ok find intersection */
564       map = gs->companion;
565       reduce = gs->local_reduce;
566       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
567         {
568           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
569               ||
570               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
571             {
572               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
573               t1++;
574               if (gs->num_local_reduce[i]<=0)
575                 {error_msg_fatal("nobody in list?");}
576               gs->num_local_reduce[i] *= -1;
577             }
578            **reduce=map[**reduce];
579         }
580 
581       /* intersection is empty */
582       if (!t1)
583         {
584           gs->local_strength = FULL;
585           gs->num_local_gop = 0;
586         }
587       /* intersection not empty */
588       else
589         {
590           gs->local_strength = PARTIAL;
591           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
592                    gs->num_local + 1, SORT_INT_PTR);
593 
594           gs->num_local_gop = t1;
595           gs->num_local_total =  gs->num_local;
596           gs->num_local    -= t1;
597           gs->gop_local_reduce = gs->local_reduce;
598           gs->num_gop_local_reduce = gs->num_local_reduce;
599 
600           for (i=0; i<t1; i++)
601             {
602               if (gs->num_gop_local_reduce[i]>=0)
603                 {error_msg_fatal("they aren't negative?");}
604               gs->num_gop_local_reduce[i] *= -1;
605               gs->local_reduce++;
606               gs->num_local_reduce++;
607             }
608           gs->local_reduce++;
609           gs->num_local_reduce++;
610         }
611     }
612 
613   elms = gs->pw_elm_list;
614   nel  = gs->len_pw_list;
615   for (i=0; i<nel; i++)
616     {elms[i] = map[elms[i]];}
617 
618   elms = gs->tree_map_in;
619   nel  = gs->tree_map_sz;
620   for (i=0; i<nel; i++)
621     {elms[i] = map[elms[i]];}
622 
623   /* clean up */
624   free((void*) gs->local_elms);
625   free((void*) gs->companion);
626   free((void*) gs->elms);
627   free((void*) gs->ngh_buf);
628   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
629 }
630 
631 
632 
633 /******************************************************************************
634 Function: place_in_tree()
635 
636 Input :
637 Output:
638 Return:
639 Description:
640 
641 
642 ******************************************************************************/
643 static
644 void
645 place_in_tree( int elm)
646 {
647    int *tp, n;
648 
649 
650   if (ntree==tree_buf_sz)
651     {
652       if (tree_buf_sz)
653         {
654           tp = tree_buf;
655           n = tree_buf_sz;
656           tree_buf_sz<<=1;
657           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
658           ivec_copy(tree_buf,tp,n);
659           free(tp);
660         }
661       else
662         {
663           tree_buf_sz = TREE_BUF_SZ;
664           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
665         }
666     }
667 
668   tree_buf[ntree++] = elm;
669 }
670 
671 
672 
673 /******************************************************************************
674 Function: get_ngh_buf()
675 
676 Input :
677 Output:
678 Return:
679 Description:
680 
681 
682 ******************************************************************************/
683 static
684 void
685 get_ngh_buf(gs_id *gs)
686 {
687    int i, j, npw=0, ntree_map=0;
688   int p_mask_size, ngh_buf_size, buf_size;
689   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
690   int *ngh_buf, *buf1, *buf2;
691   int offset, per_load, num_loads, or_ct, start, end;
692   int *ptr1, *ptr2, i_start, negl, nel, *elms;
693   int oper=GL_B_OR;
694   int *ptr3, *t_mask, level, ct1, ct2;
695 
696   /* to make life easier */
697   nel   = gs->nel;
698   elms  = gs->elms;
699   level = gs->level;
700 
701   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
702   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
703   set_bit_mask(p_mask,p_mask_size,my_id);
704 
705   /* allocate space for masks and info bufs */
706   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
707   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
708   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
709   t_mask = (int*) malloc(p_mask_size);
710   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
711 
712   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
713   /* had thought I could exploit rendezvous threshold */
714 
715   /* default is one pass */
716   per_load = negl  = gs->negl;
717   gs->num_loads = num_loads = 1;
718   i=p_mask_size*negl;
719 
720   /* possible overflow on buffer size */
721   /* overflow hack                    */
722   if (i<0) {i=INT_MAX;}
723 
724   buf_size = PetscMin(msg_buf,i);
725 
726   /* can we do it? */
727   if (p_mask_size>buf_size)
728     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
729 
730   /* get giop buf space ... make *only* one malloc */
731   buf1 = (int*) malloc(buf_size<<1);
732 
733   /* more than one gior exchange needed? */
734   if (buf_size!=i)
735     {
736       per_load = buf_size/p_mask_size;
737       buf_size = per_load*p_mask_size;
738       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
739     }
740 
741 
742   /* convert buf sizes from #bytes to #ints - 32 bit only! */
743 #ifdef SAFE
744   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
745 #else
746   p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
747 #endif
748 
749   /* find giop work space */
750   buf2 = buf1+buf_size;
751 
752   /* hold #ints needed for processor masks */
753   gs->mask_sz=p_mask_size;
754 
755   /* init buffers */
756   ivec_zero(sh_proc_mask,p_mask_size);
757   ivec_zero(pw_sh_proc_mask,p_mask_size);
758   ivec_zero(ngh_buf,ngh_buf_size);
759 
760   /* HACK reset tree info */
761   tree_buf=NULL;
762   tree_buf_sz=ntree=0;
763 
764   /* queue the tree elements for now */
765   /* elms_q = new_queue(); */
766 
767   /* can also queue tree info for pruned or forest implememtation */
768   /*  mask_q = new_queue(); */
769 
770   /* ok do it */
771   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
772     {
773       /* identity for bitwise or is 000...000 */
774       ivec_zero(buf1,buf_size);
775 
776       /* load msg buffer */
777       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
778         {
779           offset = (offset-start)*p_mask_size;
780           ivec_copy(buf1+offset,p_mask,p_mask_size);
781         }
782 
783       /* GLOBAL: pass buffer */
784       giop(buf1,buf2,buf_size,&oper);
785 
786 
787       /* unload buffer into ngh_buf */
788       ptr2=(elms+i_start);
789       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
790         {
791           /* I own it ... may have to pairwise it */
792           if (j==*ptr2)
793             {
794               /* do i share it w/anyone? */
795 #ifdef SAFE
796               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
797 #else
798               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
799 #endif
800               /* guess not */
801               if (ct1<2)
802                 {ptr2++; ptr1+=p_mask_size; continue;}
803 
804               /* i do ... so keep info and turn off my bit */
805               ivec_copy(ptr1,ptr3,p_mask_size);
806               ivec_xor(ptr1,p_mask,p_mask_size);
807               ivec_or(sh_proc_mask,ptr1,p_mask_size);
808 
809               /* is it to be done pairwise? */
810               if (--ct1<=level)
811                 {
812                   npw++;
813 
814                   /* turn on high bit to indicate pw need to process */
815                   *ptr2++ |= TOP_BIT;
816                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
817                   ptr1+=p_mask_size;
818                   continue;
819                 }
820 
821               /* get set for next and note that I have a tree contribution */
822               /* could save exact elm index for tree here -> save a search */
823               ptr2++; ptr1+=p_mask_size; ntree_map++;
824             }
825           /* i don't but still might be involved in tree */
826           else
827             {
828 
829               /* shared by how many? */
830 #ifdef SAFE
831               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
832 #else
833               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
834 #endif
835 
836               /* none! */
837               if (ct1<2)
838                 {continue;}
839 
840               /* is it going to be done pairwise? but not by me of course!*/
841               if (--ct1<=level)
842                 {continue;}
843             }
844           /* LATER we're going to have to process it NOW */
845           /* nope ... tree it */
846           place_in_tree(j);
847         }
848     }
849 
850   free((void*)t_mask);
851   free((void*)buf1);
852 
853   gs->len_pw_list=npw;
854   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
855 
856   /* expand from bit mask list to int list and save ngh list */
857   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
858   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
859 
860   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
861 
862   oper = GL_MAX;
863   ct1 = gs->num_nghs;
864   giop(&ct1,&ct2,1,&oper);
865   gs->max_nghs = ct1;
866 
867   gs->tree_map_sz  = ntree_map;
868   gs->max_left_over=ntree;
869 
870   free((void*)p_mask);
871   free((void*)sh_proc_mask);
872 
873 }
874 
875 
876 
877 
878 
879 /******************************************************************************
880 Function: pairwise_init()
881 
882 Input :
883 Output:
884 Return:
885 Description:
886 
887 if an element is shared by fewer that level# of nodes do pairwise exch
888 ******************************************************************************/
889 static
890 void
891 set_pairwise(gs_id *gs)
892 {
893    int i, j;
894   int p_mask_size;
895   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
896   int *ngh_buf, *buf2;
897   int offset;
898   int *msg_list, *msg_size, **msg_nodes, nprs;
899   int *pairwise_elm_list, len_pair_list=0;
900   int *iptr, t1, i_start, nel, *elms;
901   int ct;
902 
903 
904 
905   /* to make life easier */
906   nel  = gs->nel;
907   elms = gs->elms;
908   ngh_buf = gs->ngh_buf;
909   sh_proc_mask  = gs->pw_nghs;
910 
911   /* need a few temp masks */
912   p_mask_size   = len_bit_mask(num_nodes);
913   p_mask        = (int*) malloc(p_mask_size);
914   tmp_proc_mask = (int*) malloc(p_mask_size);
915 
916   /* set mask to my my_id's bit mask */
917   set_bit_mask(p_mask,p_mask_size,my_id);
918 
919 #ifdef SAFE
920   p_mask_size /= sizeof(PetscInt);
921 #else
922   p_mask_size >>= 2;
923 #endif
924 
925   len_pair_list=gs->len_pw_list;
926   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
927 
928   /* how many processors (nghs) do we have to exchange with? */
929   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
930 
931 
932   /* allocate space for gs_gop() info */
933   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
934   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
935   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
936 
937   /* init msg_size list */
938   ivec_zero(msg_size,nprs);
939 
940   /* expand from bit mask list to int list */
941   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
942 
943   /* keep list of elements being handled pairwise */
944   for (i=j=0;i<nel;i++)
945     {
946       if (elms[i] & TOP_BIT)
947         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
948     }
949   pairwise_elm_list[j] = -1;
950 
951   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
952   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
953   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
954   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
955   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
956 
957   /* find who goes to each processor */
958   for (i_start=i=0;i<nprs;i++)
959     {
960       /* processor i's mask */
961       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
962 
963       /* det # going to processor i */
964       for (ct=j=0;j<len_pair_list;j++)
965         {
966           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
967           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
968           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
969             {ct++;}
970         }
971       msg_size[i] = ct;
972       i_start = PetscMax(i_start,ct);
973 
974       /*space to hold nodes in message to first neighbor */
975       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
976 
977       for (j=0;j<len_pair_list;j++)
978         {
979           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
980           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
981           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
982             {*iptr++ = j;}
983         }
984       *iptr = -1;
985     }
986   msg_nodes[nprs] = NULL;
987 
988   j=gs->loc_node_pairs=i_start;
989   t1 = GL_MAX;
990   giop(&i_start,&offset,1,&t1);
991   gs->max_node_pairs = i_start;
992 
993   i_start=j;
994   t1 = GL_MIN;
995   giop(&i_start,&offset,1,&t1);
996   gs->min_node_pairs = i_start;
997 
998   i_start=j;
999   t1 = GL_ADD;
1000   giop(&i_start,&offset,1,&t1);
1001   gs->avg_node_pairs = i_start/num_nodes + 1;
1002 
1003   i_start=nprs;
1004   t1 = GL_MAX;
1005   giop(&i_start,&offset,1,&t1);
1006   gs->max_pairs = i_start;
1007 
1008 
1009   /* remap pairwise in tail of gsi_via_bit_mask() */
1010   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1011   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1012   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1013 
1014   /* reset malloc pool */
1015   free((void*)p_mask);
1016   free((void*)tmp_proc_mask);
1017 
1018 }
1019 
1020 
1021 
1022 /******************************************************************************
1023 Function: set_tree()
1024 
1025 Input :
1026 Output:
1027 Return:
1028 Description:
1029 
1030 to do pruned tree just save ngh buf copy for each one and decode here!
1031 ******************************************************************************/
1032 static
1033 void
1034 set_tree(gs_id *gs)
1035 {
1036    int i, j, n, nel;
1037    int *iptr_in, *iptr_out, *tree_elms, *elms;
1038 
1039 
1040   /* local work ptrs */
1041   elms = gs->elms;
1042   nel     = gs->nel;
1043 
1044   /* how many via tree */
1045   gs->tree_nel  = n = ntree;
1046   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1047   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1048   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1049   j=gs->tree_map_sz;
1050   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1051   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1052 
1053   /* search the longer of the two lists */
1054   /* note ... could save this info in get_ngh_buf and save searches */
1055   if (n<=nel)
1056     {
1057       /* bijective fct w/remap - search elm list */
1058       for (i=0; i<n; i++)
1059         {
1060           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1061             {*iptr_in++ = j; *iptr_out++ = i;}
1062         }
1063     }
1064   else
1065     {
1066       for (i=0; i<nel; i++)
1067         {
1068           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1069             {*iptr_in++ = i; *iptr_out++ = j;}
1070         }
1071     }
1072 
1073   /* sentinel */
1074   *iptr_in = *iptr_out = -1;
1075 
1076 }
1077 
1078 
1079 /******************************************************************************
1080 Function: gather_scatter
1081 
1082 Input :
1083 Output:
1084 Return:
1085 Description:
1086 ******************************************************************************/
1087 static
1088 void
1089 gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1090 {
1091    int *num, *map, **reduce;
1092    PetscScalar tmp;
1093 
1094 
1095   num    = gs->num_gop_local_reduce;
1096   reduce = gs->gop_local_reduce;
1097   while ((map = *reduce++))
1098     {
1099       /* wall */
1100       if (*num == 2)
1101         {
1102           num ++;
1103           vals[map[1]] = vals[map[0]];
1104         }
1105       /* corner shared by three elements */
1106       else if (*num == 3)
1107         {
1108           num ++;
1109           vals[map[2]] = vals[map[1]] = vals[map[0]];
1110         }
1111       /* corner shared by four elements */
1112       else if (*num == 4)
1113         {
1114           num ++;
1115           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1116         }
1117       /* general case ... odd geoms ... 3D*/
1118       else
1119         {
1120           num++;
1121           tmp = *(vals + *map++);
1122           while (*map >= 0)
1123             {*(vals + *map++) = tmp;}
1124         }
1125     }
1126 }
1127 
1128 
1129 
1130 /******************************************************************************
1131 Function: gather_scatter
1132 
1133 Input :
1134 Output:
1135 Return:
1136 Description:
1137 ******************************************************************************/
1138 void
1139 gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1140 {
1141   /* local only operations!!! */
1142   if (gs->num_local)
1143     {gs_gop_local_binary(gs,vals,fct);}
1144 
1145   /* if intersection tree/pairwise and local isn't empty */
1146   if (gs->num_local_gop)
1147     {
1148       gs_gop_local_in_binary(gs,vals,fct);
1149 
1150       /* pairwise */
1151       if (gs->num_pairs)
1152         {gs_gop_pairwise_binary(gs,vals,fct);}
1153 
1154       /* tree */
1155       else if (gs->max_left_over)
1156         {gs_gop_tree_binary(gs,vals,fct);}
1157 
1158       gs_gop_local_out(gs,vals);
1159     }
1160   /* if intersection tree/pairwise and local is empty */
1161   else
1162     {
1163       /* pairwise */
1164       if (gs->num_pairs)
1165         {gs_gop_pairwise_binary(gs,vals,fct);}
1166 
1167       /* tree */
1168       else if (gs->max_left_over)
1169         {gs_gop_tree_binary(gs,vals,fct);}
1170     }
1171 }
1172 
1173 
1174 
1175 /******************************************************************************
1176 Function: gather_scatter
1177 
1178 Input :
1179 Output:
1180 Return:
1181 Description:
1182 ******************************************************************************/
1183 static
1184 void
1185 gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1186 {
1187    int *num, *map, **reduce;
1188   PetscScalar tmp;
1189 
1190 
1191   num    = gs->num_local_reduce;
1192   reduce = gs->local_reduce;
1193   while ((map = *reduce))
1194     {
1195       num ++;
1196       (*fct)(&tmp,NULL,1);
1197       /* tmp = 0.0; */
1198       while (*map >= 0)
1199         {(*fct)(&tmp,(vals + *map),1); map++;}
1200         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1201 
1202       map = *reduce++;
1203       while (*map >= 0)
1204         {*(vals + *map++) = tmp;}
1205     }
1206 }
1207 
1208 
1209 
1210 /******************************************************************************
1211 Function: gather_scatter
1212 
1213 Input :
1214 Output:
1215 Return:
1216 Description:
1217 ******************************************************************************/
1218 static
1219 void
1220 gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1221 {
1222    int *num, *map, **reduce;
1223    PetscScalar *base;
1224 
1225 
1226   num    = gs->num_gop_local_reduce;
1227 
1228   reduce = gs->gop_local_reduce;
1229   while ((map = *reduce++))
1230     {
1231       num++;
1232       base = vals + *map++;
1233       while (*map >= 0)
1234         {(*fct)(base,(vals + *map),1); map++;}
1235     }
1236 }
1237 
1238 
1239 
1240 /******************************************************************************
1241 Function: gather_scatter
1242 
1243 VERSION 3 ::
1244 
1245 Input :
1246 Output:
1247 Return:
1248 Description:
1249 ******************************************************************************/
1250 static
1251 void
1252 gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1253                         rbfp fct)
1254 {
1255    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1256    int *iptr, *msg_list, *msg_size, **msg_nodes;
1257    int *pw, *list, *size, **nodes;
1258   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1259   MPI_Status status;
1260 
1261 
1262   /* strip and load s */
1263   msg_list =list         = gs->pair_list;
1264   msg_size =size         = gs->msg_sizes;
1265   msg_nodes=nodes        = gs->node_list;
1266   iptr=pw                = gs->pw_elm_list;
1267   dptr1=dptr3            = gs->pw_vals;
1268   msg_ids_in  = ids_in   = gs->msg_ids_in;
1269   msg_ids_out = ids_out  = gs->msg_ids_out;
1270   dptr2                  = gs->out;
1271   in1=in2                = gs->in;
1272 
1273   /* post the receives */
1274   /*  msg_nodes=nodes; */
1275   do
1276     {
1277       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1278          second one *list and do list++ afterwards */
1279       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1280                 gs->gs_comm, msg_ids_in++);
1281       in1 += *size++;
1282     }
1283   while (*++msg_nodes);
1284   msg_nodes=nodes;
1285 
1286   /* load gs values into in out gs buffers */
1287   while (*iptr >= 0)
1288     {*dptr3++ = *(in_vals + *iptr++);}
1289 
1290   /* load out buffers and post the sends */
1291   while ((iptr = *msg_nodes++))
1292     {
1293       dptr3 = dptr2;
1294       while (*iptr >= 0)
1295         {*dptr2++ = *(dptr1 + *iptr++);}
1296       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1297       /* is msg_ids_out++ correct? */
1298       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1299                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1300     }
1301 
1302   if (gs->max_left_over)
1303     {gs_gop_tree_binary(gs,in_vals,fct);}
1304 
1305   /* process the received data */
1306   msg_nodes=nodes;
1307   while ((iptr = *nodes++))
1308     {
1309       /* Should I check the return value of MPI_Wait() or status? */
1310       /* Can this loop be replaced by a call to MPI_Waitall()? */
1311       MPI_Wait(ids_in++, &status);
1312       while (*iptr >= 0)
1313         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1314       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1315     }
1316 
1317   /* replace vals */
1318   while (*pw >= 0)
1319     {*(in_vals + *pw++) = *dptr1++;}
1320 
1321   /* clear isend message handles */
1322   /* This changed for clarity though it could be the same */
1323   while (*msg_nodes++)
1324     /* Should I check the return value of MPI_Wait() or status? */
1325     /* Can this loop be replaced by a call to MPI_Waitall()? */
1326     {MPI_Wait(ids_out++, &status);}
1327 }
1328 
1329 
1330 
1331 /******************************************************************************
1332 Function: gather_scatter
1333 
1334 Input :
1335 Output:
1336 Return:
1337 Description:
1338 ******************************************************************************/
1339 static
1340 void
1341 gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1342 {
1343   int size;
1344   int *in, *out;
1345   PetscScalar *buf, *work;
1346 
1347   in   = gs->tree_map_in;
1348   out  = gs->tree_map_out;
1349   buf  = gs->tree_buf;
1350   work = gs->tree_work;
1351   size = gs->tree_nel;
1352 
1353   /* load vals vector w/identity */
1354   (*fct)(buf,NULL,size);
1355 
1356   /* load my contribution into val vector */
1357   while (*in >= 0)
1358     {(*fct)((buf + *out++),(vals + *in++),-1);}
1359 
1360   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1361 
1362   in   = gs->tree_map_in;
1363   out  = gs->tree_map_out;
1364   while (*in >= 0)
1365     {(*fct)((vals + *in++),(buf + *out++),-1);}
1366 
1367 }
1368 
1369 
1370 
1371 
1372 /******************************************************************************
1373 Function: gather_scatter
1374 
1375 Input :
1376 Output:
1377 Return:
1378 Description:
1379 ******************************************************************************/
1380 void
1381 gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1382 {
1383   switch (*op) {
1384   case '+':
1385     gs_gop_plus(gs,vals);
1386     break;
1387   case '*':
1388     gs_gop_times(gs,vals);
1389     break;
1390   case 'a':
1391     gs_gop_min_abs(gs,vals);
1392     break;
1393   case 'A':
1394     gs_gop_max_abs(gs,vals);
1395     break;
1396   case 'e':
1397     gs_gop_exists(gs,vals);
1398     break;
1399   case 'm':
1400     gs_gop_min(gs,vals);
1401     break;
1402   case 'M':
1403     gs_gop_max(gs,vals); break;
1404     /*
1405     if (*(op+1)=='\0')
1406       {gs_gop_max(gs,vals); break;}
1407     else if (*(op+1)=='X')
1408       {gs_gop_max_abs(gs,vals); break;}
1409     else if (*(op+1)=='N')
1410       {gs_gop_min_abs(gs,vals); break;}
1411     */
1412   default:
1413     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1414     error_msg_warning("gs_gop() :: default :: plus");
1415     gs_gop_plus(gs,vals);
1416     break;
1417   }
1418 }
1419 
1420 
1421 /******************************************************************************
1422 Function: gather_scatter
1423 
1424 Input :
1425 Output:
1426 Return:
1427 Description:
1428 ******************************************************************************/
1429 static void
1430 gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1431 {
1432   /* local only operations!!! */
1433   if (gs->num_local)
1434     {gs_gop_local_exists(gs,vals);}
1435 
1436   /* if intersection tree/pairwise and local isn't empty */
1437   if (gs->num_local_gop)
1438     {
1439       gs_gop_local_in_exists(gs,vals);
1440 
1441       /* pairwise */
1442       if (gs->num_pairs)
1443         {gs_gop_pairwise_exists(gs,vals);}
1444 
1445       /* tree */
1446       else if (gs->max_left_over)
1447         {gs_gop_tree_exists(gs,vals);}
1448 
1449       gs_gop_local_out(gs,vals);
1450     }
1451   /* if intersection tree/pairwise and local is empty */
1452   else
1453     {
1454       /* pairwise */
1455       if (gs->num_pairs)
1456         {gs_gop_pairwise_exists(gs,vals);}
1457 
1458       /* tree */
1459       else if (gs->max_left_over)
1460         {gs_gop_tree_exists(gs,vals);}
1461     }
1462 }
1463 
1464 
1465 
1466 /******************************************************************************
1467 Function: gather_scatter
1468 
1469 Input :
1470 Output:
1471 Return:
1472 Description:
1473 ******************************************************************************/
1474 static
1475 void
1476 gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1477 {
1478    int *num, *map, **reduce;
1479    PetscScalar tmp;
1480 
1481 
1482   num    = gs->num_local_reduce;
1483   reduce = gs->local_reduce;
1484   while ((map = *reduce))
1485     {
1486       num ++;
1487       tmp = 0.0;
1488       while (*map >= 0)
1489         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1490 
1491       map = *reduce++;
1492       while (*map >= 0)
1493         {*(vals + *map++) = tmp;}
1494     }
1495 }
1496 
1497 
1498 
1499 /******************************************************************************
1500 Function: gather_scatter
1501 
1502 Input :
1503 Output:
1504 Return:
1505 Description:
1506 ******************************************************************************/
1507 static
1508 void
1509 gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1510 {
1511    int *num, *map, **reduce;
1512    PetscScalar *base;
1513 
1514 
1515   num    = gs->num_gop_local_reduce;
1516   reduce = gs->gop_local_reduce;
1517   while ((map = *reduce++))
1518     {
1519       num++;
1520       base = vals + *map++;
1521       while (*map >= 0)
1522         {*base = EXISTS(*base,*(vals + *map)); map++;}
1523     }
1524 }
1525 
1526 
1527 
1528 /******************************************************************************
1529 Function: gather_scatter
1530 
1531 VERSION 3 ::
1532 
1533 Input :
1534 Output:
1535 Return:
1536 Description:
1537 ******************************************************************************/
1538 static
1539 void
1540 gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1541 {
1542    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1543    int *iptr, *msg_list, *msg_size, **msg_nodes;
1544    int *pw, *list, *size, **nodes;
1545   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1546   MPI_Status status;
1547 
1548 
1549   /* strip and load s */
1550   msg_list =list         = gs->pair_list;
1551   msg_size =size         = gs->msg_sizes;
1552   msg_nodes=nodes        = gs->node_list;
1553   iptr=pw                = gs->pw_elm_list;
1554   dptr1=dptr3            = gs->pw_vals;
1555   msg_ids_in  = ids_in   = gs->msg_ids_in;
1556   msg_ids_out = ids_out  = gs->msg_ids_out;
1557   dptr2                  = gs->out;
1558   in1=in2                = gs->in;
1559 
1560   /* post the receives */
1561   /*  msg_nodes=nodes; */
1562   do
1563     {
1564       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1565          second one *list and do list++ afterwards */
1566       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1567                 gs->gs_comm, msg_ids_in++);
1568       in1 += *size++;
1569     }
1570   while (*++msg_nodes);
1571   msg_nodes=nodes;
1572 
1573   /* load gs values into in out gs buffers */
1574   while (*iptr >= 0)
1575     {*dptr3++ = *(in_vals + *iptr++);}
1576 
1577   /* load out buffers and post the sends */
1578   while ((iptr = *msg_nodes++))
1579     {
1580       dptr3 = dptr2;
1581       while (*iptr >= 0)
1582         {*dptr2++ = *(dptr1 + *iptr++);}
1583       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1584       /* is msg_ids_out++ correct? */
1585       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1586                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1587     }
1588 
1589   if (gs->max_left_over)
1590     {gs_gop_tree_exists(gs,in_vals);}
1591 
1592   /* process the received data */
1593   msg_nodes=nodes;
1594   while ((iptr = *nodes++))
1595     {
1596       /* Should I check the return value of MPI_Wait() or status? */
1597       /* Can this loop be replaced by a call to MPI_Waitall()? */
1598       MPI_Wait(ids_in++, &status);
1599       while (*iptr >= 0)
1600         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1601     }
1602 
1603   /* replace vals */
1604   while (*pw >= 0)
1605     {*(in_vals + *pw++) = *dptr1++;}
1606 
1607   /* clear isend message handles */
1608   /* This changed for clarity though it could be the same */
1609   while (*msg_nodes++)
1610     /* Should I check the return value of MPI_Wait() or status? */
1611     /* Can this loop be replaced by a call to MPI_Waitall()? */
1612     {MPI_Wait(ids_out++, &status);}
1613 }
1614 
1615 
1616 
1617 /******************************************************************************
1618 Function: gather_scatter
1619 
1620 Input :
1621 Output:
1622 Return:
1623 Description:
1624 ******************************************************************************/
1625 static
1626 void
1627 gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1628 {
1629   int size;
1630   int *in, *out;
1631   PetscScalar *buf, *work;
1632   int op[] = {GL_EXISTS,0};
1633 
1634 
1635   in   = gs->tree_map_in;
1636   out  = gs->tree_map_out;
1637   buf  = gs->tree_buf;
1638   work = gs->tree_work;
1639   size = gs->tree_nel;
1640 
1641   rvec_zero(buf,size);
1642 
1643   while (*in >= 0)
1644     {
1645       /*
1646       printf("%d :: out=%d\n",my_id,*out);
1647       printf("%d :: in=%d\n",my_id,*in);
1648       */
1649       *(buf + *out++) = *(vals + *in++);
1650     }
1651 
1652   grop(buf,work,size,op);
1653 
1654   in   = gs->tree_map_in;
1655   out  = gs->tree_map_out;
1656 
1657   while (*in >= 0)
1658     {*(vals + *in++) = *(buf + *out++);}
1659 
1660 }
1661 
1662 
1663 
1664 /******************************************************************************
1665 Function: gather_scatter
1666 
1667 Input :
1668 Output:
1669 Return:
1670 Description:
1671 ******************************************************************************/
1672 static void
1673 gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1674 {
1675   /* local only operations!!! */
1676   if (gs->num_local)
1677     {gs_gop_local_max_abs(gs,vals);}
1678 
1679   /* if intersection tree/pairwise and local isn't empty */
1680   if (gs->num_local_gop)
1681     {
1682       gs_gop_local_in_max_abs(gs,vals);
1683 
1684       /* pairwise */
1685       if (gs->num_pairs)
1686         {gs_gop_pairwise_max_abs(gs,vals);}
1687 
1688       /* tree */
1689       else if (gs->max_left_over)
1690         {gs_gop_tree_max_abs(gs,vals);}
1691 
1692       gs_gop_local_out(gs,vals);
1693     }
1694   /* if intersection tree/pairwise and local is empty */
1695   else
1696     {
1697       /* pairwise */
1698       if (gs->num_pairs)
1699         {gs_gop_pairwise_max_abs(gs,vals);}
1700 
1701       /* tree */
1702       else if (gs->max_left_over)
1703         {gs_gop_tree_max_abs(gs,vals);}
1704     }
1705 }
1706 
1707 
1708 
1709 /******************************************************************************
1710 Function: gather_scatter
1711 
1712 Input :
1713 Output:
1714 Return:
1715 Description:
1716 ******************************************************************************/
1717 static
1718 void
1719 gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1720 {
1721    int *num, *map, **reduce;
1722    PetscScalar tmp;
1723 
1724 
1725   num    = gs->num_local_reduce;
1726   reduce = gs->local_reduce;
1727   while ((map = *reduce))
1728     {
1729       num ++;
1730       tmp = 0.0;
1731       while (*map >= 0)
1732         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1733 
1734       map = *reduce++;
1735       while (*map >= 0)
1736         {*(vals + *map++) = tmp;}
1737     }
1738 }
1739 
1740 
1741 
1742 /******************************************************************************
1743 Function: gather_scatter
1744 
1745 Input :
1746 Output:
1747 Return:
1748 Description:
1749 ******************************************************************************/
1750 static
1751 void
1752 gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1753 {
1754    int *num, *map, **reduce;
1755    PetscScalar *base;
1756 
1757 
1758   num    = gs->num_gop_local_reduce;
1759   reduce = gs->gop_local_reduce;
1760   while ((map = *reduce++))
1761     {
1762       num++;
1763       base = vals + *map++;
1764       while (*map >= 0)
1765         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1766     }
1767 }
1768 
1769 
1770 
1771 /******************************************************************************
1772 Function: gather_scatter
1773 
1774 VERSION 3 ::
1775 
1776 Input :
1777 Output:
1778 Return:
1779 Description:
1780 ******************************************************************************/
1781 static
1782 void
1783 gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1784 {
1785    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1786    int *iptr, *msg_list, *msg_size, **msg_nodes;
1787    int *pw, *list, *size, **nodes;
1788   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1789   MPI_Status status;
1790 
1791 
1792   /* strip and load s */
1793   msg_list =list         = gs->pair_list;
1794   msg_size =size         = gs->msg_sizes;
1795   msg_nodes=nodes        = gs->node_list;
1796   iptr=pw                = gs->pw_elm_list;
1797   dptr1=dptr3            = gs->pw_vals;
1798   msg_ids_in  = ids_in   = gs->msg_ids_in;
1799   msg_ids_out = ids_out  = gs->msg_ids_out;
1800   dptr2                  = gs->out;
1801   in1=in2                = gs->in;
1802 
1803   /* post the receives */
1804   /*  msg_nodes=nodes; */
1805   do
1806     {
1807       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1808          second one *list and do list++ afterwards */
1809       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1810                 gs->gs_comm, msg_ids_in++);
1811       in1 += *size++;
1812     }
1813   while (*++msg_nodes);
1814   msg_nodes=nodes;
1815 
1816   /* load gs values into in out gs buffers */
1817   while (*iptr >= 0)
1818     {*dptr3++ = *(in_vals + *iptr++);}
1819 
1820   /* load out buffers and post the sends */
1821   while ((iptr = *msg_nodes++))
1822     {
1823       dptr3 = dptr2;
1824       while (*iptr >= 0)
1825         {*dptr2++ = *(dptr1 + *iptr++);}
1826       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1827       /* is msg_ids_out++ correct? */
1828       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1829                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1830     }
1831 
1832   if (gs->max_left_over)
1833     {gs_gop_tree_max_abs(gs,in_vals);}
1834 
1835   /* process the received data */
1836   msg_nodes=nodes;
1837   while ((iptr = *nodes++))
1838     {
1839       /* Should I check the return value of MPI_Wait() or status? */
1840       /* Can this loop be replaced by a call to MPI_Waitall()? */
1841       MPI_Wait(ids_in++, &status);
1842       while (*iptr >= 0)
1843         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1844     }
1845 
1846   /* replace vals */
1847   while (*pw >= 0)
1848     {*(in_vals + *pw++) = *dptr1++;}
1849 
1850   /* clear isend message handles */
1851   /* This changed for clarity though it could be the same */
1852   while (*msg_nodes++)
1853     /* Should I check the return value of MPI_Wait() or status? */
1854     /* Can this loop be replaced by a call to MPI_Waitall()? */
1855     {MPI_Wait(ids_out++, &status);}
1856 }
1857 
1858 
1859 
1860 /******************************************************************************
1861 Function: gather_scatter
1862 
1863 Input :
1864 Output:
1865 Return:
1866 Description:
1867 ******************************************************************************/
1868 static
1869 void
1870 gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1871 {
1872   int size;
1873   int *in, *out;
1874   PetscScalar *buf, *work;
1875   int op[] = {GL_MAX_ABS,0};
1876 
1877 
1878   in   = gs->tree_map_in;
1879   out  = gs->tree_map_out;
1880   buf  = gs->tree_buf;
1881   work = gs->tree_work;
1882   size = gs->tree_nel;
1883 
1884   rvec_zero(buf,size);
1885 
1886   while (*in >= 0)
1887     {
1888       /*
1889       printf("%d :: out=%d\n",my_id,*out);
1890       printf("%d :: in=%d\n",my_id,*in);
1891       */
1892       *(buf + *out++) = *(vals + *in++);
1893     }
1894 
1895   grop(buf,work,size,op);
1896 
1897   in   = gs->tree_map_in;
1898   out  = gs->tree_map_out;
1899 
1900   while (*in >= 0)
1901     {*(vals + *in++) = *(buf + *out++);}
1902 
1903 }
1904 
1905 
1906 
1907 /******************************************************************************
1908 Function: gather_scatter
1909 
1910 Input :
1911 Output:
1912 Return:
1913 Description:
1914 ******************************************************************************/
1915 static void
1916 gs_gop_max( gs_id *gs,  PetscScalar *vals)
1917 {
1918 
1919   /* local only operations!!! */
1920   if (gs->num_local)
1921     {gs_gop_local_max(gs,vals);}
1922 
1923   /* if intersection tree/pairwise and local isn't empty */
1924   if (gs->num_local_gop)
1925     {
1926       gs_gop_local_in_max(gs,vals);
1927 
1928       /* pairwise */
1929       if (gs->num_pairs)
1930         {gs_gop_pairwise_max(gs,vals);}
1931 
1932       /* tree */
1933       else if (gs->max_left_over)
1934         {gs_gop_tree_max(gs,vals);}
1935 
1936       gs_gop_local_out(gs,vals);
1937     }
1938   /* if intersection tree/pairwise and local is empty */
1939   else
1940     {
1941       /* pairwise */
1942       if (gs->num_pairs)
1943         {gs_gop_pairwise_max(gs,vals);}
1944 
1945       /* tree */
1946       else if (gs->max_left_over)
1947         {gs_gop_tree_max(gs,vals);}
1948     }
1949 }
1950 
1951 
1952 
1953 /******************************************************************************
1954 Function: gather_scatter
1955 
1956 Input :
1957 Output:
1958 Return:
1959 Description:
1960 ******************************************************************************/
1961 static
1962 void
1963 gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1964 {
1965    int *num, *map, **reduce;
1966    PetscScalar tmp;
1967 
1968 
1969   num    = gs->num_local_reduce;
1970   reduce = gs->local_reduce;
1971   while ((map = *reduce))
1972     {
1973       num ++;
1974       tmp = -REAL_MAX;
1975       while (*map >= 0)
1976         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1977 
1978       map = *reduce++;
1979       while (*map >= 0)
1980         {*(vals + *map++) = tmp;}
1981     }
1982 }
1983 
1984 
1985 
1986 /******************************************************************************
1987 Function: gather_scatter
1988 
1989 Input :
1990 Output:
1991 Return:
1992 Description:
1993 ******************************************************************************/
1994 static
1995 void
1996 gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1997 {
1998    int *num, *map, **reduce;
1999    PetscScalar *base;
2000 
2001 
2002   num    = gs->num_gop_local_reduce;
2003   reduce = gs->gop_local_reduce;
2004   while ((map = *reduce++))
2005     {
2006       num++;
2007       base = vals + *map++;
2008       while (*map >= 0)
2009         {*base = PetscMax(*base,*(vals + *map)); map++;}
2010     }
2011 }
2012 
2013 
2014 
2015 /******************************************************************************
2016 Function: gather_scatter
2017 
2018 VERSION 3 ::
2019 
2020 Input :
2021 Output:
2022 Return:
2023 Description:
2024 ******************************************************************************/
2025 static
2026 void
2027 gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2028 {
2029    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2030    int *iptr, *msg_list, *msg_size, **msg_nodes;
2031    int *pw, *list, *size, **nodes;
2032   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2033   MPI_Status status;
2034 
2035 
2036   /* strip and load s */
2037   msg_list =list         = gs->pair_list;
2038   msg_size =size         = gs->msg_sizes;
2039   msg_nodes=nodes        = gs->node_list;
2040   iptr=pw                = gs->pw_elm_list;
2041   dptr1=dptr3            = gs->pw_vals;
2042   msg_ids_in  = ids_in   = gs->msg_ids_in;
2043   msg_ids_out = ids_out  = gs->msg_ids_out;
2044   dptr2                  = gs->out;
2045   in1=in2                = gs->in;
2046 
2047   /* post the receives */
2048   /*  msg_nodes=nodes; */
2049   do
2050     {
2051       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2052          second one *list and do list++ afterwards */
2053       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2054                 gs->gs_comm, msg_ids_in++);
2055       in1 += *size++;
2056     }
2057   while (*++msg_nodes);
2058   msg_nodes=nodes;
2059 
2060   /* load gs values into in out gs buffers */
2061   while (*iptr >= 0)
2062     {*dptr3++ = *(in_vals + *iptr++);}
2063 
2064   /* load out buffers and post the sends */
2065   while ((iptr = *msg_nodes++))
2066     {
2067       dptr3 = dptr2;
2068       while (*iptr >= 0)
2069         {*dptr2++ = *(dptr1 + *iptr++);}
2070       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2071       /* is msg_ids_out++ correct? */
2072       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2073                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2074     }
2075 
2076   if (gs->max_left_over)
2077     {gs_gop_tree_max(gs,in_vals);}
2078 
2079   /* process the received data */
2080   msg_nodes=nodes;
2081   while ((iptr = *nodes++))
2082     {
2083       /* Should I check the return value of MPI_Wait() or status? */
2084       /* Can this loop be replaced by a call to MPI_Waitall()? */
2085       MPI_Wait(ids_in++, &status);
2086       while (*iptr >= 0)
2087         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2088     }
2089 
2090   /* replace vals */
2091   while (*pw >= 0)
2092     {*(in_vals + *pw++) = *dptr1++;}
2093 
2094   /* clear isend message handles */
2095   /* This changed for clarity though it could be the same */
2096   while (*msg_nodes++)
2097     /* Should I check the return value of MPI_Wait() or status? */
2098     /* Can this loop be replaced by a call to MPI_Waitall()? */
2099     {MPI_Wait(ids_out++, &status);}
2100 }
2101 
2102 
2103 
2104 /******************************************************************************
2105 Function: gather_scatter
2106 
2107 Input :
2108 Output:
2109 Return:
2110 Description:
2111 ******************************************************************************/
2112 static
2113 void
2114 gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2115 {
2116   int size;
2117   int *in, *out;
2118   PetscScalar *buf, *work;
2119 
2120   in   = gs->tree_map_in;
2121   out  = gs->tree_map_out;
2122   buf  = gs->tree_buf;
2123   work = gs->tree_work;
2124   size = gs->tree_nel;
2125 
2126   rvec_set(buf,-REAL_MAX,size);
2127 
2128   while (*in >= 0)
2129     {*(buf + *out++) = *(vals + *in++);}
2130 
2131   in   = gs->tree_map_in;
2132   out  = gs->tree_map_out;
2133   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2134   while (*in >= 0)
2135     {*(vals + *in++) = *(work + *out++);}
2136 
2137 }
2138 
2139 
2140 
2141 /******************************************************************************
2142 Function: gather_scatter
2143 
2144 Input :
2145 Output:
2146 Return:
2147 Description:
2148 ******************************************************************************/
2149 static void
2150 gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2151 {
2152 
2153   /* local only operations!!! */
2154   if (gs->num_local)
2155     {gs_gop_local_min_abs(gs,vals);}
2156 
2157   /* if intersection tree/pairwise and local isn't empty */
2158   if (gs->num_local_gop)
2159     {
2160       gs_gop_local_in_min_abs(gs,vals);
2161 
2162       /* pairwise */
2163       if (gs->num_pairs)
2164         {gs_gop_pairwise_min_abs(gs,vals);}
2165 
2166       /* tree */
2167       else if (gs->max_left_over)
2168         {gs_gop_tree_min_abs(gs,vals);}
2169 
2170       gs_gop_local_out(gs,vals);
2171     }
2172   /* if intersection tree/pairwise and local is empty */
2173   else
2174     {
2175       /* pairwise */
2176       if (gs->num_pairs)
2177         {gs_gop_pairwise_min_abs(gs,vals);}
2178 
2179       /* tree */
2180       else if (gs->max_left_over)
2181         {gs_gop_tree_min_abs(gs,vals);}
2182     }
2183 }
2184 
2185 
2186 
2187 /******************************************************************************
2188 Function: gather_scatter
2189 
2190 Input :
2191 Output:
2192 Return:
2193 Description:
2194 ******************************************************************************/
2195 static
2196 void
2197 gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2198 {
2199    int *num, *map, **reduce;
2200    PetscScalar tmp;
2201 
2202 
2203   num    = gs->num_local_reduce;
2204   reduce = gs->local_reduce;
2205   while ((map = *reduce))
2206     {
2207       num ++;
2208       tmp = REAL_MAX;
2209       while (*map >= 0)
2210         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2211 
2212       map = *reduce++;
2213       while (*map >= 0)
2214         {*(vals + *map++) = tmp;}
2215     }
2216 }
2217 
2218 
2219 
2220 /******************************************************************************
2221 Function: gather_scatter
2222 
2223 Input :
2224 Output:
2225 Return:
2226 Description:
2227 ******************************************************************************/
2228 static
2229 void
2230 gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2231 {
2232    int *num, *map, **reduce;
2233    PetscScalar *base;
2234 
2235   num    = gs->num_gop_local_reduce;
2236   reduce = gs->gop_local_reduce;
2237   while ((map = *reduce++))
2238     {
2239       num++;
2240       base = vals + *map++;
2241       while (*map >= 0)
2242         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2243     }
2244 }
2245 
2246 
2247 
2248 /******************************************************************************
2249 Function: gather_scatter
2250 
2251 VERSION 3 ::
2252 
2253 Input :
2254 Output:
2255 Return:
2256 Description:
2257 ******************************************************************************/
2258 static
2259 void
2260 gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2261 {
2262    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2263    int *iptr, *msg_list, *msg_size, **msg_nodes;
2264    int *pw, *list, *size, **nodes;
2265   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2266   MPI_Status status;
2267 
2268 
2269   /* strip and load s */
2270   msg_list =list         = gs->pair_list;
2271   msg_size =size         = gs->msg_sizes;
2272   msg_nodes=nodes        = gs->node_list;
2273   iptr=pw                = gs->pw_elm_list;
2274   dptr1=dptr3            = gs->pw_vals;
2275   msg_ids_in  = ids_in   = gs->msg_ids_in;
2276   msg_ids_out = ids_out  = gs->msg_ids_out;
2277   dptr2                  = gs->out;
2278   in1=in2                = gs->in;
2279 
2280   /* post the receives */
2281   /*  msg_nodes=nodes; */
2282   do
2283     {
2284       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2285          second one *list and do list++ afterwards */
2286       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2287                 gs->gs_comm, msg_ids_in++);
2288       in1 += *size++;
2289     }
2290   while (*++msg_nodes);
2291   msg_nodes=nodes;
2292 
2293   /* load gs values into in out gs buffers */
2294   while (*iptr >= 0)
2295     {*dptr3++ = *(in_vals + *iptr++);}
2296 
2297   /* load out buffers and post the sends */
2298   while ((iptr = *msg_nodes++))
2299     {
2300       dptr3 = dptr2;
2301       while (*iptr >= 0)
2302         {*dptr2++ = *(dptr1 + *iptr++);}
2303       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2304       /* is msg_ids_out++ correct? */
2305       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2306                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2307     }
2308 
2309   if (gs->max_left_over)
2310     {gs_gop_tree_min_abs(gs,in_vals);}
2311 
2312   /* process the received data */
2313   msg_nodes=nodes;
2314   while ((iptr = *nodes++))
2315     {
2316       /* Should I check the return value of MPI_Wait() or status? */
2317       /* Can this loop be replaced by a call to MPI_Waitall()? */
2318       MPI_Wait(ids_in++, &status);
2319       while (*iptr >= 0)
2320         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2321     }
2322 
2323   /* replace vals */
2324   while (*pw >= 0)
2325     {*(in_vals + *pw++) = *dptr1++;}
2326 
2327   /* clear isend message handles */
2328   /* This changed for clarity though it could be the same */
2329   while (*msg_nodes++)
2330     /* Should I check the return value of MPI_Wait() or status? */
2331     /* Can this loop be replaced by a call to MPI_Waitall()? */
2332     {MPI_Wait(ids_out++, &status);}
2333 }
2334 
2335 
2336 
2337 /******************************************************************************
2338 Function: gather_scatter
2339 
2340 Input :
2341 Output:
2342 Return:
2343 Description:
2344 ******************************************************************************/
2345 static
2346 void
2347 gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2348 {
2349   int size;
2350   int *in, *out;
2351   PetscScalar *buf, *work;
2352   int op[] = {GL_MIN_ABS,0};
2353 
2354 
2355   in   = gs->tree_map_in;
2356   out  = gs->tree_map_out;
2357   buf  = gs->tree_buf;
2358   work = gs->tree_work;
2359   size = gs->tree_nel;
2360 
2361   rvec_set(buf,REAL_MAX,size);
2362 
2363   while (*in >= 0)
2364     {*(buf + *out++) = *(vals + *in++);}
2365 
2366   in   = gs->tree_map_in;
2367   out  = gs->tree_map_out;
2368   grop(buf,work,size,op);
2369   while (*in >= 0)
2370     {*(vals + *in++) = *(buf + *out++);}
2371 
2372 }
2373 
2374 
2375 
2376 /******************************************************************************
2377 Function: gather_scatter
2378 
2379 Input :
2380 Output:
2381 Return:
2382 Description:
2383 ******************************************************************************/
2384 static void
2385 gs_gop_min( gs_id *gs,  PetscScalar *vals)
2386 {
2387 
2388   /* local only operations!!! */
2389   if (gs->num_local)
2390     {gs_gop_local_min(gs,vals);}
2391 
2392   /* if intersection tree/pairwise and local isn't empty */
2393   if (gs->num_local_gop)
2394     {
2395       gs_gop_local_in_min(gs,vals);
2396 
2397       /* pairwise */
2398       if (gs->num_pairs)
2399         {gs_gop_pairwise_min(gs,vals);}
2400 
2401       /* tree */
2402       else if (gs->max_left_over)
2403         {gs_gop_tree_min(gs,vals);}
2404 
2405       gs_gop_local_out(gs,vals);
2406     }
2407   /* if intersection tree/pairwise and local is empty */
2408   else
2409     {
2410       /* pairwise */
2411       if (gs->num_pairs)
2412         {gs_gop_pairwise_min(gs,vals);}
2413 
2414       /* tree */
2415       else if (gs->max_left_over)
2416         {gs_gop_tree_min(gs,vals);}
2417     }
2418 }
2419 
2420 
2421 
2422 /******************************************************************************
2423 Function: gather_scatter
2424 
2425 Input :
2426 Output:
2427 Return:
2428 Description:
2429 ******************************************************************************/
2430 static
2431 void
2432 gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2433 {
2434    int *num, *map, **reduce;
2435    PetscScalar tmp;
2436 
2437   num    = gs->num_local_reduce;
2438   reduce = gs->local_reduce;
2439   while ((map = *reduce))
2440     {
2441       num ++;
2442       tmp = REAL_MAX;
2443       while (*map >= 0)
2444         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2445 
2446       map = *reduce++;
2447       while (*map >= 0)
2448         {*(vals + *map++) = tmp;}
2449     }
2450 }
2451 
2452 
2453 
2454 /******************************************************************************
2455 Function: gather_scatter
2456 
2457 Input :
2458 Output:
2459 Return:
2460 Description:
2461 ******************************************************************************/
2462 static
2463 void
2464 gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2465 {
2466    int *num, *map, **reduce;
2467    PetscScalar *base;
2468 
2469   num    = gs->num_gop_local_reduce;
2470   reduce = gs->gop_local_reduce;
2471   while ((map = *reduce++))
2472     {
2473       num++;
2474       base = vals + *map++;
2475       while (*map >= 0)
2476         {*base = PetscMin(*base,*(vals + *map)); map++;}
2477     }
2478 }
2479 
2480 
2481 
2482 /******************************************************************************
2483 Function: gather_scatter
2484 
2485 VERSION 3 ::
2486 
2487 Input :
2488 Output:
2489 Return:
2490 Description:
2491 ******************************************************************************/
2492 static
2493 void
2494 gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2495 {
2496    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2497    int *iptr, *msg_list, *msg_size, **msg_nodes;
2498    int *pw, *list, *size, **nodes;
2499   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2500   MPI_Status status;
2501 
2502 
2503   /* strip and load s */
2504   msg_list =list         = gs->pair_list;
2505   msg_size =size         = gs->msg_sizes;
2506   msg_nodes=nodes        = gs->node_list;
2507   iptr=pw                = gs->pw_elm_list;
2508   dptr1=dptr3            = gs->pw_vals;
2509   msg_ids_in  = ids_in   = gs->msg_ids_in;
2510   msg_ids_out = ids_out  = gs->msg_ids_out;
2511   dptr2                  = gs->out;
2512   in1=in2                = gs->in;
2513 
2514   /* post the receives */
2515   /*  msg_nodes=nodes; */
2516   do
2517     {
2518       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2519          second one *list and do list++ afterwards */
2520       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2521                 gs->gs_comm, msg_ids_in++);
2522       in1 += *size++;
2523     }
2524   while (*++msg_nodes);
2525   msg_nodes=nodes;
2526 
2527   /* load gs values into in out gs buffers */
2528   while (*iptr >= 0)
2529     {*dptr3++ = *(in_vals + *iptr++);}
2530 
2531   /* load out buffers and post the sends */
2532   while ((iptr = *msg_nodes++))
2533     {
2534       dptr3 = dptr2;
2535       while (*iptr >= 0)
2536         {*dptr2++ = *(dptr1 + *iptr++);}
2537       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2538       /* is msg_ids_out++ correct? */
2539       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2540                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2541     }
2542 
2543   /* process the received data */
2544   if (gs->max_left_over)
2545     {gs_gop_tree_min(gs,in_vals);}
2546 
2547   msg_nodes=nodes;
2548   while ((iptr = *nodes++))
2549     {
2550       /* Should I check the return value of MPI_Wait() or status? */
2551       /* Can this loop be replaced by a call to MPI_Waitall()? */
2552       MPI_Wait(ids_in++, &status);
2553       while (*iptr >= 0)
2554         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2555     }
2556 
2557   /* replace vals */
2558   while (*pw >= 0)
2559     {*(in_vals + *pw++) = *dptr1++;}
2560 
2561   /* clear isend message handles */
2562   /* This changed for clarity though it could be the same */
2563   while (*msg_nodes++)
2564     /* Should I check the return value of MPI_Wait() or status? */
2565     /* Can this loop be replaced by a call to MPI_Waitall()? */
2566     {MPI_Wait(ids_out++, &status);}
2567 }
2568 
2569 
2570 
2571 /******************************************************************************
2572 Function: gather_scatter
2573 
2574 Input :
2575 Output:
2576 Return:
2577 Description:
2578 ******************************************************************************/
2579 static
2580 void
2581 gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2582 {
2583   int size;
2584   int *in, *out;
2585   PetscScalar *buf, *work;
2586 
2587   in   = gs->tree_map_in;
2588   out  = gs->tree_map_out;
2589   buf  = gs->tree_buf;
2590   work = gs->tree_work;
2591   size = gs->tree_nel;
2592 
2593   rvec_set(buf,REAL_MAX,size);
2594 
2595   while (*in >= 0)
2596     {*(buf + *out++) = *(vals + *in++);}
2597 
2598   in   = gs->tree_map_in;
2599   out  = gs->tree_map_out;
2600   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2601   while (*in >= 0)
2602     {*(vals + *in++) = *(work + *out++);}
2603 }
2604 
2605 
2606 
2607 /******************************************************************************
2608 Function: gather_scatter
2609 
2610 Input :
2611 Output:
2612 Return:
2613 Description:
2614 ******************************************************************************/
2615 static void
2616 gs_gop_times( gs_id *gs,  PetscScalar *vals)
2617 {
2618 
2619   /* local only operations!!! */
2620   if (gs->num_local)
2621     {gs_gop_local_times(gs,vals);}
2622 
2623   /* if intersection tree/pairwise and local isn't empty */
2624   if (gs->num_local_gop)
2625     {
2626       gs_gop_local_in_times(gs,vals);
2627 
2628       /* pairwise */
2629       if (gs->num_pairs)
2630         {gs_gop_pairwise_times(gs,vals);}
2631 
2632       /* tree */
2633       else if (gs->max_left_over)
2634         {gs_gop_tree_times(gs,vals);}
2635 
2636       gs_gop_local_out(gs,vals);
2637     }
2638   /* if intersection tree/pairwise and local is empty */
2639   else
2640     {
2641       /* pairwise */
2642       if (gs->num_pairs)
2643         {gs_gop_pairwise_times(gs,vals);}
2644 
2645       /* tree */
2646       else if (gs->max_left_over)
2647         {gs_gop_tree_times(gs,vals);}
2648     }
2649 }
2650 
2651 
2652 
2653 /******************************************************************************
2654 Function: gather_scatter
2655 
2656 Input :
2657 Output:
2658 Return:
2659 Description:
2660 ******************************************************************************/
2661 static
2662 void
2663 gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2664 {
2665    int *num, *map, **reduce;
2666    PetscScalar tmp;
2667 
2668   num    = gs->num_local_reduce;
2669   reduce = gs->local_reduce;
2670   while ((map = *reduce))
2671     {
2672       /* wall */
2673       if (*num == 2)
2674         {
2675           num ++; reduce++;
2676           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2677         }
2678       /* corner shared by three elements */
2679       else if (*num == 3)
2680         {
2681           num ++; reduce++;
2682           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2683         }
2684       /* corner shared by four elements */
2685       else if (*num == 4)
2686         {
2687           num ++; reduce++;
2688           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2689                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2690         }
2691       /* general case ... odd geoms ... 3D*/
2692       else
2693         {
2694           num ++;
2695           tmp = 1.0;
2696           while (*map >= 0)
2697             {tmp *= *(vals + *map++);}
2698 
2699           map = *reduce++;
2700           while (*map >= 0)
2701             {*(vals + *map++) = tmp;}
2702         }
2703     }
2704 }
2705 
2706 
2707 
2708 /******************************************************************************
2709 Function: gather_scatter
2710 
2711 Input :
2712 Output:
2713 Return:
2714 Description:
2715 ******************************************************************************/
2716 static
2717 void
2718 gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2719 {
2720    int *num, *map, **reduce;
2721    PetscScalar *base;
2722 
2723   num    = gs->num_gop_local_reduce;
2724   reduce = gs->gop_local_reduce;
2725   while ((map = *reduce++))
2726     {
2727       /* wall */
2728       if (*num == 2)
2729         {
2730           num ++;
2731           vals[map[0]] *= vals[map[1]];
2732         }
2733       /* corner shared by three elements */
2734       else if (*num == 3)
2735         {
2736           num ++;
2737           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2738         }
2739       /* corner shared by four elements */
2740       else if (*num == 4)
2741         {
2742           num ++;
2743           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2744         }
2745       /* general case ... odd geoms ... 3D*/
2746       else
2747         {
2748           num++;
2749           base = vals + *map++;
2750           while (*map >= 0)
2751             {*base *= *(vals + *map++);}
2752         }
2753     }
2754 }
2755 
2756 
2757 
2758 /******************************************************************************
2759 Function: gather_scatter
2760 
2761 VERSION 3 ::
2762 
2763 Input :
2764 Output:
2765 Return:
2766 Description:
2767 ******************************************************************************/
2768 static
2769 void
2770 gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2771 {
2772    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2773    int *iptr, *msg_list, *msg_size, **msg_nodes;
2774    int *pw, *list, *size, **nodes;
2775   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2776   MPI_Status status;
2777 
2778 
2779   /* strip and load s */
2780   msg_list =list         = gs->pair_list;
2781   msg_size =size         = gs->msg_sizes;
2782   msg_nodes=nodes        = gs->node_list;
2783   iptr=pw                = gs->pw_elm_list;
2784   dptr1=dptr3            = gs->pw_vals;
2785   msg_ids_in  = ids_in   = gs->msg_ids_in;
2786   msg_ids_out = ids_out  = gs->msg_ids_out;
2787   dptr2                  = gs->out;
2788   in1=in2                = gs->in;
2789 
2790   /* post the receives */
2791   /*  msg_nodes=nodes; */
2792   do
2793     {
2794       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2795          second one *list and do list++ afterwards */
2796       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2797                 gs->gs_comm, msg_ids_in++);
2798       in1 += *size++;
2799     }
2800   while (*++msg_nodes);
2801   msg_nodes=nodes;
2802 
2803   /* load gs values into in out gs buffers */
2804   while (*iptr >= 0)
2805     {*dptr3++ = *(in_vals + *iptr++);}
2806 
2807   /* load out buffers and post the sends */
2808   while ((iptr = *msg_nodes++))
2809     {
2810       dptr3 = dptr2;
2811       while (*iptr >= 0)
2812         {*dptr2++ = *(dptr1 + *iptr++);}
2813       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2814       /* is msg_ids_out++ correct? */
2815       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2816                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2817     }
2818 
2819   if (gs->max_left_over)
2820     {gs_gop_tree_times(gs,in_vals);}
2821 
2822   /* process the received data */
2823   msg_nodes=nodes;
2824   while ((iptr = *nodes++))
2825     {
2826       /* Should I check the return value of MPI_Wait() or status? */
2827       /* Can this loop be replaced by a call to MPI_Waitall()? */
2828       MPI_Wait(ids_in++, &status);
2829       while (*iptr >= 0)
2830         {*(dptr1 + *iptr++) *= *in2++;}
2831     }
2832 
2833   /* replace vals */
2834   while (*pw >= 0)
2835     {*(in_vals + *pw++) = *dptr1++;}
2836 
2837   /* clear isend message handles */
2838   /* This changed for clarity though it could be the same */
2839   while (*msg_nodes++)
2840     /* Should I check the return value of MPI_Wait() or status? */
2841     /* Can this loop be replaced by a call to MPI_Waitall()? */
2842     {MPI_Wait(ids_out++, &status);}
2843 }
2844 
2845 
2846 
2847 /******************************************************************************
2848 Function: gather_scatter
2849 
2850 Input :
2851 Output:
2852 Return:
2853 Description:
2854 ******************************************************************************/
2855 static
2856 void
2857 gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2858 {
2859   int size;
2860   int *in, *out;
2861   PetscScalar *buf, *work;
2862 
2863   in   = gs->tree_map_in;
2864   out  = gs->tree_map_out;
2865   buf  = gs->tree_buf;
2866   work = gs->tree_work;
2867   size = gs->tree_nel;
2868 
2869   rvec_one(buf,size);
2870 
2871   while (*in >= 0)
2872     {*(buf + *out++) = *(vals + *in++);}
2873 
2874   in   = gs->tree_map_in;
2875   out  = gs->tree_map_out;
2876   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2877   while (*in >= 0)
2878     {*(vals + *in++) = *(work + *out++);}
2879 
2880 }
2881 
2882 
2883 
2884 /******************************************************************************
2885 Function: gather_scatter
2886 
2887 
2888 Input :
2889 Output:
2890 Return:
2891 Description:
2892 ******************************************************************************/
2893 static void
2894 gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2895 {
2896 
2897   /* local only operations!!! */
2898   if (gs->num_local)
2899     {gs_gop_local_plus(gs,vals);}
2900 
2901   /* if intersection tree/pairwise and local isn't empty */
2902   if (gs->num_local_gop)
2903     {
2904       gs_gop_local_in_plus(gs,vals);
2905 
2906       /* pairwise will NOT do tree inside ... */
2907       if (gs->num_pairs)
2908         {gs_gop_pairwise_plus(gs,vals);}
2909 
2910       /* tree */
2911       if (gs->max_left_over)
2912         {gs_gop_tree_plus(gs,vals);}
2913 
2914       gs_gop_local_out(gs,vals);
2915     }
2916   /* if intersection tree/pairwise and local is empty */
2917   else
2918     {
2919       /* pairwise will NOT do tree inside */
2920       if (gs->num_pairs)
2921         {gs_gop_pairwise_plus(gs,vals);}
2922 
2923       /* tree */
2924       if (gs->max_left_over)
2925         {gs_gop_tree_plus(gs,vals);}
2926     }
2927 
2928 }
2929 
2930 
2931 
2932 /******************************************************************************
2933 Function: gather_scatter
2934 
2935 Input :
2936 Output:
2937 Return:
2938 Description:
2939 ******************************************************************************/
2940 static
2941 void
2942 gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2943 {
2944    int *num, *map, **reduce;
2945    PetscScalar tmp;
2946 
2947 
2948   num    = gs->num_local_reduce;
2949   reduce = gs->local_reduce;
2950   while ((map = *reduce))
2951     {
2952       /* wall */
2953       if (*num == 2)
2954         {
2955           num ++; reduce++;
2956           vals[map[1]] = vals[map[0]] += vals[map[1]];
2957         }
2958       /* corner shared by three elements */
2959       else if (*num == 3)
2960         {
2961           num ++; reduce++;
2962           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2963         }
2964       /* corner shared by four elements */
2965       else if (*num == 4)
2966         {
2967           num ++; reduce++;
2968           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2969                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2970         }
2971       /* general case ... odd geoms ... 3D*/
2972       else
2973         {
2974           num ++;
2975           tmp = 0.0;
2976           while (*map >= 0)
2977             {tmp += *(vals + *map++);}
2978 
2979           map = *reduce++;
2980           while (*map >= 0)
2981             {*(vals + *map++) = tmp;}
2982         }
2983     }
2984 }
2985 
2986 
2987 
2988 /******************************************************************************
2989 Function: gather_scatter
2990 
2991 Input :
2992 Output:
2993 Return:
2994 Description:
2995 ******************************************************************************/
2996 static
2997 void
2998 gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2999 {
3000    int *num, *map, **reduce;
3001    PetscScalar *base;
3002 
3003 
3004   num    = gs->num_gop_local_reduce;
3005   reduce = gs->gop_local_reduce;
3006   while ((map = *reduce++))
3007     {
3008       /* wall */
3009       if (*num == 2)
3010         {
3011           num ++;
3012           vals[map[0]] += vals[map[1]];
3013         }
3014       /* corner shared by three elements */
3015       else if (*num == 3)
3016         {
3017           num ++;
3018           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3019         }
3020       /* corner shared by four elements */
3021       else if (*num == 4)
3022         {
3023           num ++;
3024           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3025         }
3026       /* general case ... odd geoms ... 3D*/
3027       else
3028         {
3029           num++;
3030           base = vals + *map++;
3031           while (*map >= 0)
3032             {*base += *(vals + *map++);}
3033         }
3034     }
3035 }
3036 
3037 
3038 
3039 /******************************************************************************
3040 Function: gather_scatter
3041 
3042 VERSION 3 ::
3043 
3044 Input :
3045 Output:
3046 Return:
3047 Description:
3048 ******************************************************************************/
3049 static
3050 void
3051 gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3052 {
3053    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3054    int *iptr, *msg_list, *msg_size, **msg_nodes;
3055    int *pw, *list, *size, **nodes;
3056   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3057   MPI_Status status;
3058 
3059 
3060   /* strip and load s */
3061   msg_list =list         = gs->pair_list;
3062   msg_size =size         = gs->msg_sizes;
3063   msg_nodes=nodes        = gs->node_list;
3064   iptr=pw                = gs->pw_elm_list;
3065   dptr1=dptr3            = gs->pw_vals;
3066   msg_ids_in  = ids_in   = gs->msg_ids_in;
3067   msg_ids_out = ids_out  = gs->msg_ids_out;
3068   dptr2                  = gs->out;
3069   in1=in2                = gs->in;
3070 
3071   /* post the receives */
3072   /*  msg_nodes=nodes; */
3073   do
3074     {
3075       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3076          second one *list and do list++ afterwards */
3077       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3078                 gs->gs_comm, msg_ids_in++);
3079       in1 += *size++;
3080     }
3081   while (*++msg_nodes);
3082   msg_nodes=nodes;
3083 
3084   /* load gs values into in out gs buffers */
3085   while (*iptr >= 0)
3086     {*dptr3++ = *(in_vals + *iptr++);}
3087 
3088   /* load out buffers and post the sends */
3089   while ((iptr = *msg_nodes++))
3090     {
3091       dptr3 = dptr2;
3092       while (*iptr >= 0)
3093         {*dptr2++ = *(dptr1 + *iptr++);}
3094       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3095       /* is msg_ids_out++ correct? */
3096       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3097                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3098     }
3099 
3100   /* do the tree while we're waiting */
3101   if (gs->max_left_over)
3102     {gs_gop_tree_plus(gs,in_vals);}
3103 
3104   /* process the received data */
3105   msg_nodes=nodes;
3106   while ((iptr = *nodes++))
3107     {
3108       /* Should I check the return value of MPI_Wait() or status? */
3109       /* Can this loop be replaced by a call to MPI_Waitall()? */
3110       MPI_Wait(ids_in++, &status);
3111       while (*iptr >= 0)
3112         {*(dptr1 + *iptr++) += *in2++;}
3113     }
3114 
3115   /* replace vals */
3116   while (*pw >= 0)
3117     {*(in_vals + *pw++) = *dptr1++;}
3118 
3119   /* clear isend message handles */
3120   /* This changed for clarity though it could be the same */
3121   while (*msg_nodes++)
3122     /* Should I check the return value of MPI_Wait() or status? */
3123     /* Can this loop be replaced by a call to MPI_Waitall()? */
3124     {MPI_Wait(ids_out++, &status);}
3125 
3126 }
3127 
3128 
3129 
3130 /******************************************************************************
3131 Function: gather_scatter
3132 
3133 Input :
3134 Output:
3135 Return:
3136 Description:
3137 ******************************************************************************/
3138 static
3139 void
3140 gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3141 {
3142   int size;
3143   int *in, *out;
3144   PetscScalar *buf, *work;
3145 
3146   in   = gs->tree_map_in;
3147   out  = gs->tree_map_out;
3148   buf  = gs->tree_buf;
3149   work = gs->tree_work;
3150   size = gs->tree_nel;
3151 
3152   rvec_zero(buf,size);
3153 
3154   while (*in >= 0)
3155     {*(buf + *out++) = *(vals + *in++);}
3156 
3157   in   = gs->tree_map_in;
3158   out  = gs->tree_map_out;
3159   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3160   while (*in >= 0)
3161     {*(vals + *in++) = *(work + *out++);}
3162 
3163 }
3164 
3165 /******************************************************************************
3166 Function: gs_free()
3167 
3168 Input :
3169 
3170 Output:
3171 
3172 Return:
3173 
3174 Description:
3175   if (gs->sss) {free((void*) gs->sss);}
3176 ******************************************************************************/
3177 void
3178 gs_free( gs_id *gs)
3179 {
3180    int i;
3181 
3182 
3183   if (gs->nghs) {free((void*) gs->nghs);}
3184   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3185 
3186   /* tree */
3187   if (gs->max_left_over)
3188     {
3189       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3190       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3191       if (gs->tree_work) {free((void*) gs->tree_work);}
3192       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3193       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3194     }
3195 
3196   /* pairwise info */
3197   if (gs->num_pairs)
3198     {
3199       /* should be NULL already */
3200       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3201       if (gs->elms) {free((void*) gs->elms);}
3202       if (gs->local_elms) {free((void*) gs->local_elms);}
3203       if (gs->companion) {free((void*) gs->companion);}
3204 
3205       /* only set if pairwise */
3206       if (gs->vals) {free((void*) gs->vals);}
3207       if (gs->in) {free((void*) gs->in);}
3208       if (gs->out) {free((void*) gs->out);}
3209       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3210       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3211       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3212       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3213       if (gs->node_list)
3214         {
3215           for (i=0;i<gs->num_pairs;i++)
3216             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3217           free((void*) gs->node_list);
3218         }
3219       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3220       if (gs->pair_list) {free((void*) gs->pair_list);}
3221     }
3222 
3223   /* local info */
3224   if (gs->num_local_total>=0)
3225     {
3226       for (i=0;i<gs->num_local_total+1;i++)
3227         /*      for (i=0;i<gs->num_local_total;i++) */
3228         {
3229           if (gs->num_gop_local_reduce[i])
3230             {free((void*) gs->gop_local_reduce[i]);}
3231         }
3232     }
3233 
3234   /* if intersection tree/pairwise and local isn't empty */
3235   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3236   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3237 
3238   free((void*) gs);
3239 }
3240 
3241 
3242 
3243 
3244 
3245 
3246 /******************************************************************************
3247 Function: gather_scatter
3248 
3249 Input :
3250 Output:
3251 Return:
3252 Description:
3253 ******************************************************************************/
3254 void
3255 gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3256 {
3257 
3258   switch (*op) {
3259   case '+':
3260     gs_gop_vec_plus(gs,vals,step);
3261     break;
3262   default:
3263     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3264     error_msg_warning("gs_gop_vec() :: default :: plus");
3265     gs_gop_vec_plus(gs,vals,step);
3266     break;
3267   }
3268 }
3269 
3270 
3271 
3272 /******************************************************************************
3273 Function: gather_scatter
3274 
3275 Input :
3276 Output:
3277 Return:
3278 Description:
3279 ******************************************************************************/
3280 static void
3281 gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3282 {
3283   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3284 
3285   /* local only operations!!! */
3286   if (gs->num_local)
3287     {gs_gop_vec_local_plus(gs,vals,step);}
3288 
3289   /* if intersection tree/pairwise and local isn't empty */
3290   if (gs->num_local_gop)
3291     {
3292       gs_gop_vec_local_in_plus(gs,vals,step);
3293 
3294       /* pairwise */
3295       if (gs->num_pairs)
3296         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3297 
3298       /* tree */
3299       else if (gs->max_left_over)
3300         {gs_gop_vec_tree_plus(gs,vals,step);}
3301 
3302       gs_gop_vec_local_out(gs,vals,step);
3303     }
3304   /* if intersection tree/pairwise and local is empty */
3305   else
3306     {
3307       /* pairwise */
3308       if (gs->num_pairs)
3309         {gs_gop_vec_pairwise_plus(gs,vals,step);}
3310 
3311       /* tree */
3312       else if (gs->max_left_over)
3313         {gs_gop_vec_tree_plus(gs,vals,step);}
3314     }
3315 }
3316 
3317 
3318 
3319 /******************************************************************************
3320 Function: gather_scatter
3321 
3322 Input :
3323 Output:
3324 Return:
3325 Description:
3326 ******************************************************************************/
3327 static
3328 void
3329 gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals,
3330                        int step)
3331 {
3332    int *num, *map, **reduce;
3333    PetscScalar *base;
3334 
3335 
3336   num    = gs->num_local_reduce;
3337   reduce = gs->local_reduce;
3338   while ((map = *reduce))
3339     {
3340       base = vals + map[0] * step;
3341 
3342       /* wall */
3343       if (*num == 2)
3344         {
3345           num++; reduce++;
3346           rvec_add (base,vals+map[1]*step,step);
3347           rvec_copy(vals+map[1]*step,base,step);
3348         }
3349       /* corner shared by three elements */
3350       else if (*num == 3)
3351         {
3352           num++; reduce++;
3353           rvec_add (base,vals+map[1]*step,step);
3354           rvec_add (base,vals+map[2]*step,step);
3355           rvec_copy(vals+map[2]*step,base,step);
3356           rvec_copy(vals+map[1]*step,base,step);
3357         }
3358       /* corner shared by four elements */
3359       else if (*num == 4)
3360         {
3361           num++; reduce++;
3362           rvec_add (base,vals+map[1]*step,step);
3363           rvec_add (base,vals+map[2]*step,step);
3364           rvec_add (base,vals+map[3]*step,step);
3365           rvec_copy(vals+map[3]*step,base,step);
3366           rvec_copy(vals+map[2]*step,base,step);
3367           rvec_copy(vals+map[1]*step,base,step);
3368         }
3369       /* general case ... odd geoms ... 3D */
3370       else
3371         {
3372           num++;
3373           while (*++map >= 0)
3374             {rvec_add (base,vals+*map*step,step);}
3375 
3376           map = *reduce;
3377           while (*++map >= 0)
3378             {rvec_copy(vals+*map*step,base,step);}
3379 
3380           reduce++;
3381         }
3382     }
3383 }
3384 
3385 
3386 
3387 /******************************************************************************
3388 Function: gather_scatter
3389 
3390 Input :
3391 Output:
3392 Return:
3393 Description:
3394 ******************************************************************************/
3395 static
3396 void
3397 gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals,
3398                           int step)
3399 {
3400    int  *num, *map, **reduce;
3401    PetscScalar *base;
3402 
3403   num    = gs->num_gop_local_reduce;
3404   reduce = gs->gop_local_reduce;
3405   while ((map = *reduce++))
3406     {
3407       base = vals + map[0] * step;
3408 
3409       /* wall */
3410       if (*num == 2)
3411         {
3412           num ++;
3413           rvec_add(base,vals+map[1]*step,step);
3414         }
3415       /* corner shared by three elements */
3416       else if (*num == 3)
3417         {
3418           num ++;
3419           rvec_add(base,vals+map[1]*step,step);
3420           rvec_add(base,vals+map[2]*step,step);
3421         }
3422       /* corner shared by four elements */
3423       else if (*num == 4)
3424         {
3425           num ++;
3426           rvec_add(base,vals+map[1]*step,step);
3427           rvec_add(base,vals+map[2]*step,step);
3428           rvec_add(base,vals+map[3]*step,step);
3429         }
3430       /* general case ... odd geoms ... 3D*/
3431       else
3432         {
3433           num++;
3434           while (*++map >= 0)
3435             {rvec_add(base,vals+*map*step,step);}
3436         }
3437     }
3438 }
3439 
3440 
3441 /******************************************************************************
3442 Function: gather_scatter
3443 
3444 Input :
3445 Output:
3446 Return:
3447 Description:
3448 ******************************************************************************/
3449 static
3450 void
3451 gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals,
3452                       int step)
3453 {
3454    int *num, *map, **reduce;
3455    PetscScalar *base;
3456 
3457 
3458   num    = gs->num_gop_local_reduce;
3459   reduce = gs->gop_local_reduce;
3460   while ((map = *reduce++))
3461     {
3462       base = vals + map[0] * step;
3463 
3464       /* wall */
3465       if (*num == 2)
3466         {
3467           num ++;
3468           rvec_copy(vals+map[1]*step,base,step);
3469         }
3470       /* corner shared by three elements */
3471       else if (*num == 3)
3472         {
3473           num ++;
3474           rvec_copy(vals+map[1]*step,base,step);
3475           rvec_copy(vals+map[2]*step,base,step);
3476         }
3477       /* corner shared by four elements */
3478       else if (*num == 4)
3479         {
3480           num ++;
3481           rvec_copy(vals+map[1]*step,base,step);
3482           rvec_copy(vals+map[2]*step,base,step);
3483           rvec_copy(vals+map[3]*step,base,step);
3484         }
3485       /* general case ... odd geoms ... 3D*/
3486       else
3487         {
3488           num++;
3489           while (*++map >= 0)
3490             {rvec_copy(vals+*map*step,base,step);}
3491         }
3492     }
3493 }
3494 
3495 
3496 
3497 /******************************************************************************
3498 Function: gather_scatter
3499 
3500 VERSION 3 ::
3501 
3502 Input :
3503 Output:
3504 Return:
3505 Description:
3506 ******************************************************************************/
3507 static
3508 void
3509 gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3510                           int step)
3511 {
3512    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3513    int *iptr, *msg_list, *msg_size, **msg_nodes;
3514    int *pw, *list, *size, **nodes;
3515   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3516   MPI_Status status;
3517   PetscBLASInt i1;
3518 
3519 
3520   /* strip and load s */
3521   msg_list =list         = gs->pair_list;
3522   msg_size =size         = gs->msg_sizes;
3523   msg_nodes=nodes        = gs->node_list;
3524   iptr=pw                = gs->pw_elm_list;
3525   dptr1=dptr3            = gs->pw_vals;
3526   msg_ids_in  = ids_in   = gs->msg_ids_in;
3527   msg_ids_out = ids_out  = gs->msg_ids_out;
3528   dptr2                  = gs->out;
3529   in1=in2                = gs->in;
3530 
3531   /* post the receives */
3532   /*  msg_nodes=nodes; */
3533   do
3534     {
3535       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3536          second one *list and do list++ afterwards */
3537       MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3538                 gs->gs_comm, msg_ids_in++);
3539       in1 += *size++ *step;
3540     }
3541   while (*++msg_nodes);
3542   msg_nodes=nodes;
3543 
3544   /* load gs values into in out gs buffers */
3545   while (*iptr >= 0)
3546     {
3547       rvec_copy(dptr3,in_vals + *iptr*step,step);
3548       dptr3+=step;
3549       iptr++;
3550     }
3551 
3552   /* load out buffers and post the sends */
3553   while ((iptr = *msg_nodes++))
3554     {
3555       dptr3 = dptr2;
3556       while (*iptr >= 0)
3557         {
3558           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3559           dptr2+=step;
3560           iptr++;
3561         }
3562       MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3563                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3564     }
3565 
3566   /* tree */
3567   if (gs->max_left_over)
3568     {gs_gop_vec_tree_plus(gs,in_vals,step);}
3569 
3570   /* process the received data */
3571   msg_nodes=nodes;
3572   while ((iptr = *nodes++)){
3573     PetscScalar d1 = 1.0;
3574       /* Should I check the return value of MPI_Wait() or status? */
3575       /* Can this loop be replaced by a call to MPI_Waitall()? */
3576       MPI_Wait(ids_in++, &status);
3577       while (*iptr >= 0) {
3578           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3579           in2+=step;
3580           iptr++;
3581       }
3582   }
3583 
3584   /* replace vals */
3585   while (*pw >= 0)
3586     {
3587       rvec_copy(in_vals + *pw*step,dptr1,step);
3588       dptr1+=step;
3589       pw++;
3590     }
3591 
3592   /* clear isend message handles */
3593   /* This changed for clarity though it could be the same */
3594   while (*msg_nodes++)
3595     /* Should I check the return value of MPI_Wait() or status? */
3596     /* Can this loop be replaced by a call to MPI_Waitall()? */
3597     {MPI_Wait(ids_out++, &status);}
3598 
3599 
3600 }
3601 
3602 
3603 
3604 /******************************************************************************
3605 Function: gather_scatter
3606 
3607 Input :
3608 Output:
3609 Return:
3610 Description:
3611 ******************************************************************************/
3612 static
3613 void
3614 gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3615 {
3616   int size, *in, *out;
3617   PetscScalar *buf, *work;
3618   int op[] = {GL_ADD,0};
3619   PetscBLASInt i1 = 1;
3620 
3621 
3622   /* copy over to local variables */
3623   in   = gs->tree_map_in;
3624   out  = gs->tree_map_out;
3625   buf  = gs->tree_buf;
3626   work = gs->tree_work;
3627   size = gs->tree_nel*step;
3628 
3629   /* zero out collection buffer */
3630   rvec_zero(buf,size);
3631 
3632 
3633   /* copy over my contributions */
3634   while (*in >= 0)
3635     {
3636       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3637     }
3638 
3639   /* perform fan in/out on full buffer */
3640   /* must change grop to handle the blas */
3641   grop(buf,work,size,op);
3642 
3643   /* reset */
3644   in   = gs->tree_map_in;
3645   out  = gs->tree_map_out;
3646 
3647   /* get the portion of the results I need */
3648   while (*in >= 0)
3649     {
3650       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3651     }
3652 
3653 }
3654 
3655 
3656 
3657 /******************************************************************************
3658 Function: gather_scatter
3659 
3660 Input :
3661 Output:
3662 Return:
3663 Description:
3664 ******************************************************************************/
3665 void
3666 gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3667 {
3668 
3669   switch (*op) {
3670   case '+':
3671     gs_gop_plus_hc(gs,vals,dim);
3672     break;
3673   default:
3674     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3675     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3676     gs_gop_plus_hc(gs,vals,dim);
3677     break;
3678   }
3679 }
3680 
3681 
3682 
3683 /******************************************************************************
3684 Function: gather_scatter
3685 
3686 Input :
3687 Output:
3688 Return:
3689 Description:
3690 ******************************************************************************/
3691 static void
3692 gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3693 {
3694   /* if there's nothing to do return */
3695   if (dim<=0)
3696     {return;}
3697 
3698   /* can't do more dimensions then exist */
3699   dim = PetscMin(dim,i_log2_num_nodes);
3700 
3701   /* local only operations!!! */
3702   if (gs->num_local)
3703     {gs_gop_local_plus(gs,vals);}
3704 
3705   /* if intersection tree/pairwise and local isn't empty */
3706   if (gs->num_local_gop)
3707     {
3708       gs_gop_local_in_plus(gs,vals);
3709 
3710       /* pairwise will do tree inside ... */
3711       if (gs->num_pairs)
3712         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3713 
3714       /* tree only */
3715       else if (gs->max_left_over)
3716         {gs_gop_tree_plus_hc(gs,vals,dim);}
3717 
3718       gs_gop_local_out(gs,vals);
3719     }
3720   /* if intersection tree/pairwise and local is empty */
3721   else
3722     {
3723       /* pairwise will do tree inside */
3724       if (gs->num_pairs)
3725         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3726 
3727       /* tree */
3728       else if (gs->max_left_over)
3729         {gs_gop_tree_plus_hc(gs,vals,dim);}
3730     }
3731 
3732 }
3733 
3734 
3735 /******************************************************************************
3736 VERSION 3 ::
3737 
3738 Input :
3739 Output:
3740 Return:
3741 Description:
3742 ******************************************************************************/
3743 static
3744 void
3745 gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3746 {
3747    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3748    int *iptr, *msg_list, *msg_size, **msg_nodes;
3749    int *pw, *list, *size, **nodes;
3750   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3751   MPI_Status status;
3752   int i, mask=1;
3753 
3754   for (i=1; i<dim; i++)
3755     {mask<<=1; mask++;}
3756 
3757 
3758   /* strip and load s */
3759   msg_list =list         = gs->pair_list;
3760   msg_size =size         = gs->msg_sizes;
3761   msg_nodes=nodes        = gs->node_list;
3762   iptr=pw                = gs->pw_elm_list;
3763   dptr1=dptr3            = gs->pw_vals;
3764   msg_ids_in  = ids_in   = gs->msg_ids_in;
3765   msg_ids_out = ids_out  = gs->msg_ids_out;
3766   dptr2                  = gs->out;
3767   in1=in2                = gs->in;
3768 
3769   /* post the receives */
3770   /*  msg_nodes=nodes; */
3771   do
3772     {
3773       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3774          second one *list and do list++ afterwards */
3775       if ((my_id|mask)==(*list|mask))
3776         {
3777           MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3778                     gs->gs_comm, msg_ids_in++);
3779           in1 += *size++;
3780         }
3781       else
3782         {list++; size++;}
3783     }
3784   while (*++msg_nodes);
3785 
3786   /* load gs values into in out gs buffers */
3787   while (*iptr >= 0)
3788     {*dptr3++ = *(in_vals + *iptr++);}
3789 
3790   /* load out buffers and post the sends */
3791   msg_nodes=nodes;
3792   list = msg_list;
3793   while ((iptr = *msg_nodes++))
3794     {
3795       if ((my_id|mask)==(*list|mask))
3796         {
3797           dptr3 = dptr2;
3798           while (*iptr >= 0)
3799             {*dptr2++ = *(dptr1 + *iptr++);}
3800           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3801           /* is msg_ids_out++ correct? */
3802           MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3803                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3804         }
3805       else
3806         {list++; msg_size++;}
3807     }
3808 
3809   /* do the tree while we're waiting */
3810   if (gs->max_left_over)
3811     {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3812 
3813   /* process the received data */
3814   msg_nodes=nodes;
3815   list = msg_list;
3816   while ((iptr = *nodes++))
3817     {
3818       if ((my_id|mask)==(*list|mask))
3819         {
3820           /* Should I check the return value of MPI_Wait() or status? */
3821           /* Can this loop be replaced by a call to MPI_Waitall()? */
3822           MPI_Wait(ids_in++, &status);
3823           while (*iptr >= 0)
3824             {*(dptr1 + *iptr++) += *in2++;}
3825         }
3826       list++;
3827     }
3828 
3829   /* replace vals */
3830   while (*pw >= 0)
3831     {*(in_vals + *pw++) = *dptr1++;}
3832 
3833   /* clear isend message handles */
3834   /* This changed for clarity though it could be the same */
3835   while (*msg_nodes++)
3836     {
3837       if ((my_id|mask)==(*msg_list|mask))
3838         {
3839           /* Should I check the return value of MPI_Wait() or status? */
3840           /* Can this loop be replaced by a call to MPI_Waitall()? */
3841           MPI_Wait(ids_out++, &status);
3842         }
3843       msg_list++;
3844     }
3845 
3846 
3847 }
3848 
3849 
3850 
3851 /******************************************************************************
3852 Function: gather_scatter
3853 
3854 Input :
3855 Output:
3856 Return:
3857 Description:
3858 ******************************************************************************/
3859 static
3860 void
3861 gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3862 {
3863   int size;
3864   int *in, *out;
3865   PetscScalar *buf, *work;
3866   int op[] = {GL_ADD,0};
3867 
3868   in   = gs->tree_map_in;
3869   out  = gs->tree_map_out;
3870   buf  = gs->tree_buf;
3871   work = gs->tree_work;
3872   size = gs->tree_nel;
3873 
3874   rvec_zero(buf,size);
3875 
3876   while (*in >= 0)
3877     {*(buf + *out++) = *(vals + *in++);}
3878 
3879   in   = gs->tree_map_in;
3880   out  = gs->tree_map_out;
3881 
3882   grop_hc(buf,work,size,op,dim);
3883 
3884   while (*in >= 0)
3885     {*(vals + *in++) = *(buf + *out++);}
3886 
3887 }
3888 
3889 
3890 
3891