1*21789920SBarry Smith /*
2827bd09bSSatish Balay
3827bd09bSSatish Balay Author: Henry M. Tufo III
4827bd09bSSatish Balay
5827bd09bSSatish Balay e-mail: hmt@cs.brown.edu
6827bd09bSSatish Balay
7827bd09bSSatish Balay snail-mail:
8827bd09bSSatish Balay Division of Applied Mathematics
9827bd09bSSatish Balay Brown University
10827bd09bSSatish Balay Providence, RI 02912
11827bd09bSSatish Balay
12827bd09bSSatish Balay Last Modification:
13827bd09bSSatish Balay 6.21.97
14827bd09bSSatish Balay
15827bd09bSSatish Balay File Description:
16827bd09bSSatish Balay -----------------
17827bd09bSSatish Balay
18*21789920SBarry Smith */
19827bd09bSSatish Balay
20c6db04a5SJed Brown #include <../src/ksp/pc/impls/tfs/tfs.h>
2139945688SSatish Balay
22827bd09bSSatish Balay /* default length of number of items via tree - doubles if exceeded */
23a8f51744SPierre Jolivet #define TREE_BUF_SZ 2048
24827bd09bSSatish Balay #define GS_VEC_SZ 1
25827bd09bSSatish Balay
26*21789920SBarry Smith /*
27827bd09bSSatish Balay Type: struct gather_scatter_id
28*21789920SBarry Smith */
29827bd09bSSatish Balay typedef struct gather_scatter_id {
3052f87cdaSBarry Smith PetscInt id;
3152f87cdaSBarry Smith PetscInt nel_min;
3252f87cdaSBarry Smith PetscInt nel_max;
3352f87cdaSBarry Smith PetscInt nel_sum;
3452f87cdaSBarry Smith PetscInt negl;
3552f87cdaSBarry Smith PetscInt gl_max;
3652f87cdaSBarry Smith PetscInt gl_min;
3752f87cdaSBarry Smith PetscInt repeats;
3852f87cdaSBarry Smith PetscInt ordered;
3952f87cdaSBarry Smith PetscInt positive;
40a501084fSBarry Smith PetscScalar *vals;
41827bd09bSSatish Balay
42827bd09bSSatish Balay /* bit mask info */
4352f87cdaSBarry Smith PetscInt *my_proc_mask;
4452f87cdaSBarry Smith PetscInt mask_sz;
4552f87cdaSBarry Smith PetscInt *ngh_buf;
4652f87cdaSBarry Smith PetscInt ngh_buf_sz;
4752f87cdaSBarry Smith PetscInt *nghs;
4852f87cdaSBarry Smith PetscInt num_nghs;
4952f87cdaSBarry Smith PetscInt max_nghs;
5052f87cdaSBarry Smith PetscInt *pw_nghs;
5152f87cdaSBarry Smith PetscInt num_pw_nghs;
5252f87cdaSBarry Smith PetscInt *tree_nghs;
5352f87cdaSBarry Smith PetscInt num_tree_nghs;
54827bd09bSSatish Balay
5552f87cdaSBarry Smith PetscInt num_loads;
56827bd09bSSatish Balay
57827bd09bSSatish Balay /* repeats == true -> local info */
58baca6076SPierre Jolivet PetscInt nel; /* number of unique elements */
5952f87cdaSBarry Smith PetscInt *elms; /* of size nel */
6052f87cdaSBarry Smith PetscInt nel_total;
6152f87cdaSBarry Smith PetscInt *local_elms; /* of size nel_total */
6252f87cdaSBarry Smith PetscInt *companion; /* of size nel_total */
63827bd09bSSatish Balay
64827bd09bSSatish Balay /* local info */
6552f87cdaSBarry Smith PetscInt num_local_total;
6652f87cdaSBarry Smith PetscInt local_strength;
6752f87cdaSBarry Smith PetscInt num_local;
6852f87cdaSBarry Smith PetscInt *num_local_reduce;
6952f87cdaSBarry Smith PetscInt **local_reduce;
7052f87cdaSBarry Smith PetscInt num_local_gop;
7152f87cdaSBarry Smith PetscInt *num_gop_local_reduce;
7252f87cdaSBarry Smith PetscInt **gop_local_reduce;
73827bd09bSSatish Balay
74827bd09bSSatish Balay /* pairwise info */
7552f87cdaSBarry Smith PetscInt level;
7652f87cdaSBarry Smith PetscInt num_pairs;
7752f87cdaSBarry Smith PetscInt max_pairs;
7852f87cdaSBarry Smith PetscInt loc_node_pairs;
7952f87cdaSBarry Smith PetscInt max_node_pairs;
8052f87cdaSBarry Smith PetscInt min_node_pairs;
8152f87cdaSBarry Smith PetscInt avg_node_pairs;
8252f87cdaSBarry Smith PetscInt *pair_list;
8352f87cdaSBarry Smith PetscInt *msg_sizes;
8452f87cdaSBarry Smith PetscInt **node_list;
8552f87cdaSBarry Smith PetscInt len_pw_list;
8652f87cdaSBarry Smith PetscInt *pw_elm_list;
87a501084fSBarry Smith PetscScalar *pw_vals;
88827bd09bSSatish Balay
89827bd09bSSatish Balay MPI_Request *msg_ids_in;
90827bd09bSSatish Balay MPI_Request *msg_ids_out;
91827bd09bSSatish Balay
92a501084fSBarry Smith PetscScalar *out;
93a501084fSBarry Smith PetscScalar *in;
9452f87cdaSBarry Smith PetscInt msg_total;
95827bd09bSSatish Balay
96827bd09bSSatish Balay /* tree - crystal accumulator info */
9752f87cdaSBarry Smith PetscInt max_left_over;
9852f87cdaSBarry Smith PetscInt *pre;
9952f87cdaSBarry Smith PetscInt *in_num;
10052f87cdaSBarry Smith PetscInt *out_num;
10152f87cdaSBarry Smith PetscInt **in_list;
10252f87cdaSBarry Smith PetscInt **out_list;
103827bd09bSSatish Balay
104827bd09bSSatish Balay /* new tree work*/
10552f87cdaSBarry Smith PetscInt tree_nel;
10652f87cdaSBarry Smith PetscInt *tree_elms;
107a501084fSBarry Smith PetscScalar *tree_buf;
108a501084fSBarry Smith PetscScalar *tree_work;
109827bd09bSSatish Balay
11052f87cdaSBarry Smith PetscInt tree_map_sz;
11152f87cdaSBarry Smith PetscInt *tree_map_in;
11252f87cdaSBarry Smith PetscInt *tree_map_out;
113827bd09bSSatish Balay
114827bd09bSSatish Balay /* current memory status */
11552f87cdaSBarry Smith PetscInt gl_bss_min;
11652f87cdaSBarry Smith PetscInt gl_perm_min;
117827bd09bSSatish Balay
118ca8e9878SJed Brown /* max segment size for PCTFS_gs_gop_vec() */
11952f87cdaSBarry Smith PetscInt vec_sz;
120827bd09bSSatish Balay
121827bd09bSSatish Balay /* hack to make paul happy */
122ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm;
123827bd09bSSatish Balay
124ca8e9878SJed Brown } PCTFS_gs_id;
125827bd09bSSatish Balay
126ca8e9878SJed Brown static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
127ca8e9878SJed Brown static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
128ca8e9878SJed Brown static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
129ca8e9878SJed Brown static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
130ca8e9878SJed Brown static PCTFS_gs_id *gsi_new(void);
131ca8e9878SJed Brown static PetscErrorCode set_tree(PCTFS_gs_id *gs);
132827bd09bSSatish Balay
133827bd09bSSatish Balay /* same for all but vector flavor */
134ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
135827bd09bSSatish Balay /* vector flavor */
136ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
137827bd09bSSatish Balay
138ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
139ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
140ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
141ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
142ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
143827bd09bSSatish Balay
144ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
145ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
146827bd09bSSatish Balay
147ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
148ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
149ca8e9878SJed Brown static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
150827bd09bSSatish Balay
151827bd09bSSatish Balay /* global vars */
152827bd09bSSatish Balay /* from comm.c module */
153827bd09bSSatish Balay
15452f87cdaSBarry Smith static PetscInt num_gs_ids = 0;
155827bd09bSSatish Balay
156827bd09bSSatish Balay /* should make this dynamic ... later */
15752f87cdaSBarry Smith static PetscInt msg_buf = MAX_MSG_BUF;
15852f87cdaSBarry Smith static PetscInt vec_sz = GS_VEC_SZ;
15952f87cdaSBarry Smith static PetscInt *tree_buf = NULL;
16052f87cdaSBarry Smith static PetscInt tree_buf_sz = 0;
16152f87cdaSBarry Smith static PetscInt ntree = 0;
162827bd09bSSatish Balay
163f1ed62a8SBarry Smith /***************************************************************************/
PCTFS_gs_init_vec_sz(PetscInt size)164d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size)
165d71ae5a4SJacob Faibussowitsch {
1663fdc5746SBarry Smith PetscFunctionBegin;
167827bd09bSSatish Balay vec_sz = size;
1683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
169827bd09bSSatish Balay }
170827bd09bSSatish Balay
171f1ed62a8SBarry Smith /******************************************************************************/
PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)172d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)
173d71ae5a4SJacob Faibussowitsch {
1743fdc5746SBarry Smith PetscFunctionBegin;
175827bd09bSSatish Balay msg_buf = buf_size;
1763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
177827bd09bSSatish Balay }
178827bd09bSSatish Balay
179f1ed62a8SBarry Smith /******************************************************************************/
PCTFS_gs_init(PetscInt * elms,PetscInt nel,PetscInt level)180d71ae5a4SJacob Faibussowitsch PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level)
181d71ae5a4SJacob Faibussowitsch {
182ca8e9878SJed Brown PCTFS_gs_id *gs;
183ca8e9878SJed Brown MPI_Group PCTFS_gs_group;
184ca8e9878SJed Brown MPI_Comm PCTFS_gs_comm;
185827bd09bSSatish Balay
186827bd09bSSatish Balay /* ensure that communication package has been initialized */
1873ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_SELF, PCTFS_comm_init());
188827bd09bSSatish Balay
189827bd09bSSatish Balay /* determines if we have enough dynamic/semi-static memory */
190827bd09bSSatish Balay /* checks input, allocs and sets gd_id template */
191827bd09bSSatish Balay gs = gsi_check_args(elms, nel, level);
192827bd09bSSatish Balay
193827bd09bSSatish Balay /* only bit mask version up and working for the moment */
194827bd09bSSatish Balay /* LATER :: get int list version working for sparse pblms */
1959566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs));
196827bd09bSSatish Balay
1973ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
1983ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm) ? PETSC_ERR_MPI : PETSC_SUCCESS);
1993ba16761SJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
2002fa5cd67SKarl Rupp
201ca8e9878SJed Brown gs->PCTFS_gs_comm = PCTFS_gs_comm;
202827bd09bSSatish Balay
2034ad8454bSPierre Jolivet return gs;
204827bd09bSSatish Balay }
205827bd09bSSatish Balay
206f1ed62a8SBarry Smith /******************************************************************************/
gsi_new(void)207d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_new(void)
208d71ae5a4SJacob Faibussowitsch {
209ca8e9878SJed Brown PCTFS_gs_id *gs;
210ca8e9878SJed Brown gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id));
2119566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id)));
2124ad8454bSPierre Jolivet return gs;
213827bd09bSSatish Balay }
214827bd09bSSatish Balay
215f1ed62a8SBarry Smith /******************************************************************************/
gsi_check_args(PetscInt * in_elms,PetscInt nel,PetscInt level)216d71ae5a4SJacob Faibussowitsch static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
217d71ae5a4SJacob Faibussowitsch {
21852f87cdaSBarry Smith PetscInt i, j, k, t2;
21952f87cdaSBarry Smith PetscInt *companion, *elms, *unique, *iptr;
22052f87cdaSBarry Smith PetscInt num_local = 0, *num_to_reduce, **local_reduce;
22152f87cdaSBarry Smith PetscInt oprs[] = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND};
222dd39110bSPierre Jolivet PetscInt vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
223dd39110bSPierre Jolivet PetscInt work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
224ca8e9878SJed Brown PCTFS_gs_id *gs;
225827bd09bSSatish Balay
226c1235816SBarry Smith if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n");
227c1235816SBarry Smith if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n");
228827bd09bSSatish Balay
2299566063dSJacob Faibussowitsch if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n"));
230827bd09bSSatish Balay
231827bd09bSSatish Balay /* get space for gs template */
232827bd09bSSatish Balay gs = gsi_new();
233827bd09bSSatish Balay gs->id = ++num_gs_ids;
234827bd09bSSatish Balay
235827bd09bSSatish Balay /* hmt 6.4.99 */
236827bd09bSSatish Balay /* caller can set global ids that don't participate to 0 */
237ca8e9878SJed Brown /* PCTFS_gs_init ignores all zeros in elm list */
238827bd09bSSatish Balay /* negative global ids are still invalid */
2392fa5cd67SKarl Rupp for (i = j = 0; i < nel; i++) {
2402fa5cd67SKarl Rupp if (in_elms[i] != 0) j++;
2412fa5cd67SKarl Rupp }
242827bd09bSSatish Balay
2439371c9d4SSatish Balay k = nel;
2449371c9d4SSatish Balay nel = j;
245827bd09bSSatish Balay
246827bd09bSSatish Balay /* copy over in_elms list and create inverse map */
24752f87cdaSBarry Smith elms = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt));
24852f87cdaSBarry Smith companion = (PetscInt *)malloc(nel * sizeof(PetscInt));
2491d7d0905SBarry Smith
250db4deed7SKarl Rupp for (i = j = 0; i < k; i++) {
2519371c9d4SSatish Balay if (in_elms[i] != 0) {
2529371c9d4SSatish Balay elms[j] = in_elms[i];
2539371c9d4SSatish Balay companion[j++] = i;
2549371c9d4SSatish Balay }
255827bd09bSSatish Balay }
256827bd09bSSatish Balay
257c1235816SBarry Smith if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n");
258827bd09bSSatish Balay
259827bd09bSSatish Balay /* pre-pass ... check to see if sorted */
260827bd09bSSatish Balay elms[nel] = INT_MAX;
261827bd09bSSatish Balay iptr = elms;
262827bd09bSSatish Balay unique = elms + 1;
263827bd09bSSatish Balay j = 0;
264db4deed7SKarl Rupp while (*iptr != INT_MAX) {
2659371c9d4SSatish Balay if (*iptr++ > *unique++) {
2669371c9d4SSatish Balay j = 1;
2679371c9d4SSatish Balay break;
2689371c9d4SSatish Balay }
269827bd09bSSatish Balay }
270827bd09bSSatish Balay
271827bd09bSSatish Balay /* set up inverse map */
272db4deed7SKarl Rupp if (j) {
2739566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n"));
2749566063dSJacob Faibussowitsch PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER));
2759566063dSJacob Faibussowitsch } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n"));
276827bd09bSSatish Balay elms[nel] = INT_MIN;
277827bd09bSSatish Balay
278827bd09bSSatish Balay /* first pass */
279827bd09bSSatish Balay /* determine number of unique elements, check pd */
280db4deed7SKarl Rupp for (i = k = 0; i < nel; i += j) {
281827bd09bSSatish Balay t2 = elms[i];
282827bd09bSSatish Balay j = ++i;
283827bd09bSSatish Balay
284827bd09bSSatish Balay /* clump 'em for now */
2852fa5cd67SKarl Rupp while (elms[j] == t2) j++;
286827bd09bSSatish Balay
287827bd09bSSatish Balay /* how many together and num local */
2889371c9d4SSatish Balay if (j -= i) {
2899371c9d4SSatish Balay num_local++;
2909371c9d4SSatish Balay k += j;
2919371c9d4SSatish Balay }
292827bd09bSSatish Balay }
293827bd09bSSatish Balay
294827bd09bSSatish Balay /* how many unique elements? */
295827bd09bSSatish Balay gs->repeats = k;
296827bd09bSSatish Balay gs->nel = nel - k;
297827bd09bSSatish Balay
298827bd09bSSatish Balay /* number of repeats? */
299827bd09bSSatish Balay gs->num_local = num_local;
300827bd09bSSatish Balay num_local += 2;
30152f87cdaSBarry Smith gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *));
30252f87cdaSBarry Smith gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt));
303827bd09bSSatish Balay
30452f87cdaSBarry Smith unique = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt));
305827bd09bSSatish Balay gs->elms = unique;
306827bd09bSSatish Balay gs->nel_total = nel;
307827bd09bSSatish Balay gs->local_elms = elms;
308827bd09bSSatish Balay gs->companion = companion;
309827bd09bSSatish Balay
310827bd09bSSatish Balay /* compess map as well as keep track of local ops */
311db4deed7SKarl Rupp for (num_local = i = j = 0; i < gs->nel; i++) {
312827bd09bSSatish Balay k = j;
313827bd09bSSatish Balay t2 = unique[i] = elms[j];
314827bd09bSSatish Balay companion[i] = companion[j];
315827bd09bSSatish Balay
3162fa5cd67SKarl Rupp while (elms[j] == t2) j++;
317827bd09bSSatish Balay
318db4deed7SKarl Rupp if ((t2 = (j - k)) > 1) {
319827bd09bSSatish Balay /* number together */
320827bd09bSSatish Balay num_to_reduce[num_local] = t2++;
3212fa5cd67SKarl Rupp
32252f87cdaSBarry Smith iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt));
323827bd09bSSatish Balay
324827bd09bSSatish Balay /* to use binary searching don't remap until we check intersection */
325827bd09bSSatish Balay *iptr++ = i;
326827bd09bSSatish Balay
327827bd09bSSatish Balay /* note that we're skipping the first one */
3282fa5cd67SKarl Rupp while (++k < j) *(iptr++) = companion[k];
329827bd09bSSatish Balay *iptr = -1;
330827bd09bSSatish Balay }
331827bd09bSSatish Balay }
332827bd09bSSatish Balay
333827bd09bSSatish Balay /* sentinel for ngh_buf */
334827bd09bSSatish Balay unique[gs->nel] = INT_MAX;
335827bd09bSSatish Balay
336827bd09bSSatish Balay /* for two partition sort hack */
337827bd09bSSatish Balay num_to_reduce[num_local] = 0;
338827bd09bSSatish Balay local_reduce[num_local] = NULL;
339827bd09bSSatish Balay num_to_reduce[++num_local] = 0;
340827bd09bSSatish Balay local_reduce[num_local] = NULL;
341827bd09bSSatish Balay
342827bd09bSSatish Balay /* load 'em up */
343827bd09bSSatish Balay /* note one extra to hold NON_UNIFORM flag!!! */
344827bd09bSSatish Balay vals[2] = vals[1] = vals[0] = nel;
345db4deed7SKarl Rupp if (gs->nel > 0) {
3461d7d0905SBarry Smith vals[3] = unique[0];
3471d7d0905SBarry Smith vals[4] = unique[gs->nel - 1];
348db4deed7SKarl Rupp } else {
3491d7d0905SBarry Smith vals[3] = INT_MAX;
3501d7d0905SBarry Smith vals[4] = INT_MIN;
351827bd09bSSatish Balay }
352827bd09bSSatish Balay vals[5] = level;
353827bd09bSSatish Balay vals[6] = num_gs_ids;
354827bd09bSSatish Balay
355827bd09bSSatish Balay /* GLOBAL: send 'em out */
356dd39110bSPierre Jolivet PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs));
357827bd09bSSatish Balay
358827bd09bSSatish Balay /* must be semi-pos def - only pairwise depends on this */
359827bd09bSSatish Balay /* LATER - remove this restriction */
360c1235816SBarry Smith if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n");
361c1235816SBarry Smith if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n");
362827bd09bSSatish Balay
363827bd09bSSatish Balay gs->nel_min = vals[0];
364827bd09bSSatish Balay gs->nel_max = vals[1];
365827bd09bSSatish Balay gs->nel_sum = vals[2];
366827bd09bSSatish Balay gs->gl_min = vals[3];
367827bd09bSSatish Balay gs->gl_max = vals[4];
368827bd09bSSatish Balay gs->negl = vals[4] - vals[3] + 1;
369827bd09bSSatish Balay
37063a3b9bcSJacob Faibussowitsch if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl);
371827bd09bSSatish Balay
372827bd09bSSatish Balay /* LATER :: add level == -1 -> program selects level */
3732fa5cd67SKarl Rupp if (vals[5] < 0) vals[5] = 0;
3742fa5cd67SKarl Rupp else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes;
375827bd09bSSatish Balay gs->level = vals[5];
376827bd09bSSatish Balay
3774ad8454bSPierre Jolivet return gs;
378827bd09bSSatish Balay }
379827bd09bSSatish Balay
380f1ed62a8SBarry Smith /******************************************************************************/
gsi_via_bit_mask(PCTFS_gs_id * gs)381d71ae5a4SJacob Faibussowitsch static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs)
382d71ae5a4SJacob Faibussowitsch {
38352f87cdaSBarry Smith PetscInt i, nel, *elms;
38452f87cdaSBarry Smith PetscInt t1;
38552f87cdaSBarry Smith PetscInt **reduce;
38652f87cdaSBarry Smith PetscInt *map;
387827bd09bSSatish Balay
388f1ed62a8SBarry Smith PetscFunctionBegin;
389ca8e9878SJed Brown /* totally local removes ... PCTFS_ct_bits == 0 */
3903ba16761SJacob Faibussowitsch PetscCall(get_ngh_buf(gs));
391827bd09bSSatish Balay
3923ba16761SJacob Faibussowitsch if (gs->level) PetscCall(set_pairwise(gs));
3933ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(set_tree(gs));
394827bd09bSSatish Balay
395827bd09bSSatish Balay /* intersection local and pairwise/tree? */
396827bd09bSSatish Balay gs->num_local_total = gs->num_local;
397827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce;
398827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce;
399827bd09bSSatish Balay
400827bd09bSSatish Balay map = gs->companion;
401827bd09bSSatish Balay
402827bd09bSSatish Balay /* is there any local compression */
403d890fc11SSatish Balay if (!gs->num_local) {
404827bd09bSSatish Balay gs->local_strength = NONE;
405827bd09bSSatish Balay gs->num_local_gop = 0;
406d890fc11SSatish Balay } else {
407827bd09bSSatish Balay /* ok find intersection */
408827bd09bSSatish Balay map = gs->companion;
409827bd09bSSatish Balay reduce = gs->local_reduce;
4104a2f8832SBarry Smith for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) {
4114a2f8832SBarry Smith if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) {
412827bd09bSSatish Balay t1++;
41308401ef6SPierre Jolivet PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?");
414827bd09bSSatish Balay gs->num_local_reduce[i] *= -1;
415827bd09bSSatish Balay }
416827bd09bSSatish Balay **reduce = map[**reduce];
417827bd09bSSatish Balay }
418827bd09bSSatish Balay
419827bd09bSSatish Balay /* intersection is empty */
420db4deed7SKarl Rupp if (!t1) {
421827bd09bSSatish Balay gs->local_strength = FULL;
422827bd09bSSatish Balay gs->num_local_gop = 0;
423db4deed7SKarl Rupp } else { /* intersection not empty */
424827bd09bSSatish Balay gs->local_strength = PARTIAL;
4252fa5cd67SKarl Rupp
4269566063dSJacob Faibussowitsch PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR));
427827bd09bSSatish Balay
428827bd09bSSatish Balay gs->num_local_gop = t1;
429827bd09bSSatish Balay gs->num_local_total = gs->num_local;
430827bd09bSSatish Balay gs->num_local -= t1;
431827bd09bSSatish Balay gs->gop_local_reduce = gs->local_reduce;
432827bd09bSSatish Balay gs->num_gop_local_reduce = gs->num_local_reduce;
433827bd09bSSatish Balay
4342fa5cd67SKarl Rupp for (i = 0; i < t1; i++) {
43508401ef6SPierre Jolivet PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?");
436827bd09bSSatish Balay gs->num_gop_local_reduce[i] *= -1;
437827bd09bSSatish Balay gs->local_reduce++;
438827bd09bSSatish Balay gs->num_local_reduce++;
439827bd09bSSatish Balay }
440827bd09bSSatish Balay gs->local_reduce++;
441827bd09bSSatish Balay gs->num_local_reduce++;
442827bd09bSSatish Balay }
443827bd09bSSatish Balay }
444827bd09bSSatish Balay
445827bd09bSSatish Balay elms = gs->pw_elm_list;
446827bd09bSSatish Balay nel = gs->len_pw_list;
4472fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
448827bd09bSSatish Balay
449827bd09bSSatish Balay elms = gs->tree_map_in;
450827bd09bSSatish Balay nel = gs->tree_map_sz;
4512fa5cd67SKarl Rupp for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
452827bd09bSSatish Balay
453827bd09bSSatish Balay /* clean up */
454a501084fSBarry Smith free((void *)gs->local_elms);
455a501084fSBarry Smith free((void *)gs->companion);
456a501084fSBarry Smith free((void *)gs->elms);
457a501084fSBarry Smith free((void *)gs->ngh_buf);
458827bd09bSSatish Balay gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
4593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
460827bd09bSSatish Balay }
461827bd09bSSatish Balay
462f1ed62a8SBarry Smith /******************************************************************************/
place_in_tree(PetscInt elm)463d71ae5a4SJacob Faibussowitsch static PetscErrorCode place_in_tree(PetscInt elm)
464d71ae5a4SJacob Faibussowitsch {
46552f87cdaSBarry Smith PetscInt *tp, n;
466827bd09bSSatish Balay
4673fdc5746SBarry Smith PetscFunctionBegin;
4682fa5cd67SKarl Rupp if (ntree == tree_buf_sz) {
469db4deed7SKarl Rupp if (tree_buf_sz) {
470827bd09bSSatish Balay tp = tree_buf;
471827bd09bSSatish Balay n = tree_buf_sz;
472827bd09bSSatish Balay tree_buf_sz <<= 1;
47352f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
474ca8e9878SJed Brown PCTFS_ivec_copy(tree_buf, tp, n);
475a501084fSBarry Smith free(tp);
476db4deed7SKarl Rupp } else {
477827bd09bSSatish Balay tree_buf_sz = TREE_BUF_SZ;
47852f87cdaSBarry Smith tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
479827bd09bSSatish Balay }
480827bd09bSSatish Balay }
481827bd09bSSatish Balay
482827bd09bSSatish Balay tree_buf[ntree++] = elm;
4833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
484827bd09bSSatish Balay }
485827bd09bSSatish Balay
486f1ed62a8SBarry Smith /******************************************************************************/
get_ngh_buf(PCTFS_gs_id * gs)487d71ae5a4SJacob Faibussowitsch static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs)
488d71ae5a4SJacob Faibussowitsch {
48952f87cdaSBarry Smith PetscInt i, j, npw = 0, ntree_map = 0;
49052f87cdaSBarry Smith PetscInt p_mask_size, ngh_buf_size, buf_size;
49152f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
49252f87cdaSBarry Smith PetscInt *ngh_buf, *buf1, *buf2;
49352f87cdaSBarry Smith PetscInt offset, per_load, num_loads, or_ct, start, end;
49452f87cdaSBarry Smith PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
49552f87cdaSBarry Smith PetscInt oper = GL_B_OR;
49652f87cdaSBarry Smith PetscInt *ptr3, *t_mask, level, ct1, ct2;
497827bd09bSSatish Balay
4983fdc5746SBarry Smith PetscFunctionBegin;
499827bd09bSSatish Balay /* to make life easier */
500827bd09bSSatish Balay nel = gs->nel;
501827bd09bSSatish Balay elms = gs->elms;
502827bd09bSSatish Balay level = gs->level;
503827bd09bSSatish Balay
504b1c944f5SJed Brown /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
505ca8e9878SJed Brown p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes));
5069566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
507827bd09bSSatish Balay
508827bd09bSSatish Balay /* allocate space for masks and info bufs */
50952f87cdaSBarry Smith gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size);
51052f87cdaSBarry Smith gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size);
511827bd09bSSatish Balay gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel;
51252f87cdaSBarry Smith t_mask = (PetscInt *)malloc(p_mask_size);
51352f87cdaSBarry Smith gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size);
514827bd09bSSatish Balay
515827bd09bSSatish Balay /* comm buffer size ... memory usage bounded by ~2*msg_buf */
516827bd09bSSatish Balay /* had thought I could exploit rendezvous threshold */
517827bd09bSSatish Balay
518827bd09bSSatish Balay /* default is one pass */
519827bd09bSSatish Balay per_load = negl = gs->negl;
520827bd09bSSatish Balay gs->num_loads = num_loads = 1;
521827bd09bSSatish Balay i = p_mask_size * negl;
522827bd09bSSatish Balay
523827bd09bSSatish Balay /* possible overflow on buffer size */
524827bd09bSSatish Balay /* overflow hack */
5252fa5cd67SKarl Rupp if (i < 0) i = INT_MAX;
526827bd09bSSatish Balay
52739945688SSatish Balay buf_size = PetscMin(msg_buf, i);
528827bd09bSSatish Balay
529827bd09bSSatish Balay /* can we do it? */
53063a3b9bcSJacob Faibussowitsch PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size);
531827bd09bSSatish Balay
532b1c944f5SJed Brown /* get PCTFS_giop buf space ... make *only* one malloc */
53352f87cdaSBarry Smith buf1 = (PetscInt *)malloc(buf_size << 1);
534827bd09bSSatish Balay
535827bd09bSSatish Balay /* more than one gior exchange needed? */
536db4deed7SKarl Rupp if (buf_size != i) {
537827bd09bSSatish Balay per_load = buf_size / p_mask_size;
538827bd09bSSatish Balay buf_size = per_load * p_mask_size;
539827bd09bSSatish Balay gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0);
540827bd09bSSatish Balay }
541827bd09bSSatish Balay
5427de69702SBarry Smith /* convert buf sizes from #bytes to #ints - 32-bit only! */
5439371c9d4SSatish Balay p_mask_size /= sizeof(PetscInt);
5449371c9d4SSatish Balay ngh_buf_size /= sizeof(PetscInt);
5459371c9d4SSatish Balay buf_size /= sizeof(PetscInt);
546827bd09bSSatish Balay
547b1c944f5SJed Brown /* find PCTFS_giop work space */
548827bd09bSSatish Balay buf2 = buf1 + buf_size;
549827bd09bSSatish Balay
550827bd09bSSatish Balay /* hold #ints needed for processor masks */
551827bd09bSSatish Balay gs->mask_sz = p_mask_size;
552827bd09bSSatish Balay
553827bd09bSSatish Balay /* init buffers */
5549566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size));
5559566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size));
5569566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size));
557827bd09bSSatish Balay
558827bd09bSSatish Balay /* HACK reset tree info */
559827bd09bSSatish Balay tree_buf = NULL;
560827bd09bSSatish Balay tree_buf_sz = ntree = 0;
561827bd09bSSatish Balay
562827bd09bSSatish Balay /* ok do it */
563db4deed7SKarl Rupp for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) {
564827bd09bSSatish Balay /* identity for bitwise or is 000...000 */
5653ba16761SJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(buf1, buf_size));
566827bd09bSSatish Balay
567827bd09bSSatish Balay /* load msg buffer */
568db4deed7SKarl Rupp for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) {
569827bd09bSSatish Balay offset = (offset - start) * p_mask_size;
570ca8e9878SJed Brown PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size);
571827bd09bSSatish Balay }
572827bd09bSSatish Balay
573827bd09bSSatish Balay /* GLOBAL: pass buffer */
5749566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper));
575827bd09bSSatish Balay
576827bd09bSSatish Balay /* unload buffer into ngh_buf */
577827bd09bSSatish Balay ptr2 = (elms + i_start);
578db4deed7SKarl Rupp for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) {
579827bd09bSSatish Balay /* I own it ... may have to pairwise it */
580db4deed7SKarl Rupp if (j == *ptr2) {
581827bd09bSSatish Balay /* do i share it w/anyone? */
582ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
583827bd09bSSatish Balay /* guess not */
5849371c9d4SSatish Balay if (ct1 < 2) {
5859371c9d4SSatish Balay ptr2++;
5869371c9d4SSatish Balay ptr1 += p_mask_size;
5879371c9d4SSatish Balay continue;
5889371c9d4SSatish Balay }
589827bd09bSSatish Balay
590827bd09bSSatish Balay /* i do ... so keep info and turn off my bit */
591ca8e9878SJed Brown PCTFS_ivec_copy(ptr1, ptr3, p_mask_size);
5929566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size));
5939566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size));
594827bd09bSSatish Balay
595827bd09bSSatish Balay /* is it to be done pairwise? */
596db4deed7SKarl Rupp if (--ct1 <= level) {
597827bd09bSSatish Balay npw++;
598827bd09bSSatish Balay
599827bd09bSSatish Balay /* turn on high bit to indicate pw need to process */
600827bd09bSSatish Balay *ptr2++ |= TOP_BIT;
6019566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size));
602827bd09bSSatish Balay ptr1 += p_mask_size;
603827bd09bSSatish Balay continue;
604827bd09bSSatish Balay }
605827bd09bSSatish Balay
606827bd09bSSatish Balay /* get set for next and note that I have a tree contribution */
607827bd09bSSatish Balay /* could save exact elm index for tree here -> save a search */
6089371c9d4SSatish Balay ptr2++;
6099371c9d4SSatish Balay ptr1 += p_mask_size;
6109371c9d4SSatish Balay ntree_map++;
611db4deed7SKarl Rupp } else { /* i don't but still might be involved in tree */
612827bd09bSSatish Balay
613827bd09bSSatish Balay /* shared by how many? */
614ca8e9878SJed Brown ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
615827bd09bSSatish Balay
616827bd09bSSatish Balay /* none! */
617f1ed62a8SBarry Smith if (ct1 < 2) continue;
618827bd09bSSatish Balay
619827bd09bSSatish Balay /* is it going to be done pairwise? but not by me of course!*/
620f1ed62a8SBarry Smith if (--ct1 <= level) continue;
621827bd09bSSatish Balay }
622827bd09bSSatish Balay /* LATER we're going to have to process it NOW */
623827bd09bSSatish Balay /* nope ... tree it */
6249566063dSJacob Faibussowitsch PetscCall(place_in_tree(j));
625827bd09bSSatish Balay }
626827bd09bSSatish Balay }
627827bd09bSSatish Balay
628a501084fSBarry Smith free((void *)t_mask);
629a501084fSBarry Smith free((void *)buf1);
630827bd09bSSatish Balay
631827bd09bSSatish Balay gs->len_pw_list = npw;
632ca8e9878SJed Brown gs->num_nghs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
633827bd09bSSatish Balay
634827bd09bSSatish Balay /* expand from bit mask list to int list and save ngh list */
63552f87cdaSBarry Smith gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt));
6363ba16761SJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs));
637827bd09bSSatish Balay
638ca8e9878SJed Brown gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt));
639827bd09bSSatish Balay
640827bd09bSSatish Balay oper = GL_MAX;
641827bd09bSSatish Balay ct1 = gs->num_nghs;
6429566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper));
643827bd09bSSatish Balay gs->max_nghs = ct1;
644827bd09bSSatish Balay
645827bd09bSSatish Balay gs->tree_map_sz = ntree_map;
646827bd09bSSatish Balay gs->max_left_over = ntree;
647827bd09bSSatish Balay
648a501084fSBarry Smith free((void *)p_mask);
649a501084fSBarry Smith free((void *)sh_proc_mask);
6503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
651827bd09bSSatish Balay }
652827bd09bSSatish Balay
653f1ed62a8SBarry Smith /******************************************************************************/
set_pairwise(PCTFS_gs_id * gs)654d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_pairwise(PCTFS_gs_id *gs)
655d71ae5a4SJacob Faibussowitsch {
65652f87cdaSBarry Smith PetscInt i, j;
65752f87cdaSBarry Smith PetscInt p_mask_size;
65852f87cdaSBarry Smith PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
65952f87cdaSBarry Smith PetscInt *ngh_buf, *buf2;
66052f87cdaSBarry Smith PetscInt offset;
66152f87cdaSBarry Smith PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
66252f87cdaSBarry Smith PetscInt *pairwise_elm_list, len_pair_list = 0;
66352f87cdaSBarry Smith PetscInt *iptr, t1, i_start, nel, *elms;
66452f87cdaSBarry Smith PetscInt ct;
665827bd09bSSatish Balay
6663fdc5746SBarry Smith PetscFunctionBegin;
667827bd09bSSatish Balay /* to make life easier */
668827bd09bSSatish Balay nel = gs->nel;
669827bd09bSSatish Balay elms = gs->elms;
670827bd09bSSatish Balay ngh_buf = gs->ngh_buf;
671827bd09bSSatish Balay sh_proc_mask = gs->pw_nghs;
672827bd09bSSatish Balay
673827bd09bSSatish Balay /* need a few temp masks */
674ca8e9878SJed Brown p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes);
67552f87cdaSBarry Smith p_mask = (PetscInt *)malloc(p_mask_size);
67652f87cdaSBarry Smith tmp_proc_mask = (PetscInt *)malloc(p_mask_size);
677827bd09bSSatish Balay
678b1c944f5SJed Brown /* set mask to my PCTFS_my_id's bit mask */
6799566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
680827bd09bSSatish Balay
681a501084fSBarry Smith p_mask_size /= sizeof(PetscInt);
682827bd09bSSatish Balay
683827bd09bSSatish Balay len_pair_list = gs->len_pw_list;
68452f87cdaSBarry Smith gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt));
685827bd09bSSatish Balay
686827bd09bSSatish Balay /* how many processors (nghs) do we have to exchange with? */
687ca8e9878SJed Brown nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
688827bd09bSSatish Balay
689ca8e9878SJed Brown /* allocate space for PCTFS_gs_gop() info */
69052f87cdaSBarry Smith gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
69152f87cdaSBarry Smith gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
69252f87cdaSBarry Smith gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1));
693827bd09bSSatish Balay
694827bd09bSSatish Balay /* init msg_size list */
6959566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_zero(msg_size, nprs));
696827bd09bSSatish Balay
697827bd09bSSatish Balay /* expand from bit mask list to int list */
6989566063dSJacob Faibussowitsch PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list));
699827bd09bSSatish Balay
700827bd09bSSatish Balay /* keep list of elements being handled pairwise */
701db4deed7SKarl Rupp for (i = j = 0; i < nel; i++) {
7029371c9d4SSatish Balay if (elms[i] & TOP_BIT) {
7039371c9d4SSatish Balay elms[i] ^= TOP_BIT;
7049371c9d4SSatish Balay pairwise_elm_list[j++] = i;
7059371c9d4SSatish Balay }
706827bd09bSSatish Balay }
707827bd09bSSatish Balay pairwise_elm_list[j] = -1;
708827bd09bSSatish Balay
709a501084fSBarry Smith gs->msg_ids_out = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
710827bd09bSSatish Balay gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
711a501084fSBarry Smith gs->msg_ids_in = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
712827bd09bSSatish Balay gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
713a501084fSBarry Smith gs->pw_vals = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz);
714827bd09bSSatish Balay
715827bd09bSSatish Balay /* find who goes to each processor */
716db4deed7SKarl Rupp for (i_start = i = 0; i < nprs; i++) {
717827bd09bSSatish Balay /* processor i's mask */
7189566063dSJacob Faibussowitsch PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i]));
719827bd09bSSatish Balay
720827bd09bSSatish Balay /* det # going to processor i */
721db4deed7SKarl Rupp for (ct = j = 0; j < len_pair_list; j++) {
722827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7239566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7242fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++;
725827bd09bSSatish Balay }
726827bd09bSSatish Balay msg_size[i] = ct;
72739945688SSatish Balay i_start = PetscMax(i_start, ct);
728827bd09bSSatish Balay
729827bd09bSSatish Balay /*space to hold nodes in message to first neighbor */
73052f87cdaSBarry Smith msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1));
731827bd09bSSatish Balay
732db4deed7SKarl Rupp for (j = 0; j < len_pair_list; j++) {
733827bd09bSSatish Balay buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
7349566063dSJacob Faibussowitsch PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
7352fa5cd67SKarl Rupp if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j;
736827bd09bSSatish Balay }
737827bd09bSSatish Balay *iptr = -1;
738827bd09bSSatish Balay }
739827bd09bSSatish Balay msg_nodes[nprs] = NULL;
740827bd09bSSatish Balay
741827bd09bSSatish Balay j = gs->loc_node_pairs = i_start;
742827bd09bSSatish Balay t1 = GL_MAX;
7439566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
744827bd09bSSatish Balay gs->max_node_pairs = i_start;
745827bd09bSSatish Balay
746827bd09bSSatish Balay i_start = j;
747827bd09bSSatish Balay t1 = GL_MIN;
7489566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
749827bd09bSSatish Balay gs->min_node_pairs = i_start;
750827bd09bSSatish Balay
751827bd09bSSatish Balay i_start = j;
752827bd09bSSatish Balay t1 = GL_ADD;
7539566063dSJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
754b1c944f5SJed Brown gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1;
755827bd09bSSatish Balay
756827bd09bSSatish Balay i_start = nprs;
757827bd09bSSatish Balay t1 = GL_MAX;
7583ba16761SJacob Faibussowitsch PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
759827bd09bSSatish Balay gs->max_pairs = i_start;
760827bd09bSSatish Balay
761827bd09bSSatish Balay /* remap pairwise in tail of gsi_via_bit_mask() */
762ca8e9878SJed Brown gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs);
763a501084fSBarry Smith gs->out = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
764a501084fSBarry Smith gs->in = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
765827bd09bSSatish Balay
766827bd09bSSatish Balay /* reset malloc pool */
767a501084fSBarry Smith free((void *)p_mask);
768a501084fSBarry Smith free((void *)tmp_proc_mask);
7693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
770827bd09bSSatish Balay }
771827bd09bSSatish Balay
772f1ed62a8SBarry Smith /* to do pruned tree just save ngh buf copy for each one and decode here!
773827bd09bSSatish Balay ******************************************************************************/
set_tree(PCTFS_gs_id * gs)774d71ae5a4SJacob Faibussowitsch static PetscErrorCode set_tree(PCTFS_gs_id *gs)
775d71ae5a4SJacob Faibussowitsch {
77652f87cdaSBarry Smith PetscInt i, j, n, nel;
77752f87cdaSBarry Smith PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
778827bd09bSSatish Balay
7793fdc5746SBarry Smith PetscFunctionBegin;
780827bd09bSSatish Balay /* local work ptrs */
781827bd09bSSatish Balay elms = gs->elms;
782827bd09bSSatish Balay nel = gs->nel;
783827bd09bSSatish Balay
784827bd09bSSatish Balay /* how many via tree */
785827bd09bSSatish Balay gs->tree_nel = n = ntree;
786827bd09bSSatish Balay gs->tree_elms = tree_elms = iptr_in = tree_buf;
787a501084fSBarry Smith gs->tree_buf = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
788a501084fSBarry Smith gs->tree_work = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
789827bd09bSSatish Balay j = gs->tree_map_sz;
79052f87cdaSBarry Smith gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
79152f87cdaSBarry Smith gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
792827bd09bSSatish Balay
793827bd09bSSatish Balay /* search the longer of the two lists */
794827bd09bSSatish Balay /* note ... could save this info in get_ngh_buf and save searches */
795db4deed7SKarl Rupp if (n <= nel) {
796827bd09bSSatish Balay /* bijective fct w/remap - search elm list */
797db4deed7SKarl Rupp for (i = 0; i < n; i++) {
7989371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) {
7999371c9d4SSatish Balay *iptr_in++ = j;
8009371c9d4SSatish Balay *iptr_out++ = i;
8019371c9d4SSatish Balay }
802827bd09bSSatish Balay }
803db4deed7SKarl Rupp } else {
804db4deed7SKarl Rupp for (i = 0; i < nel; i++) {
8059371c9d4SSatish Balay if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) {
8069371c9d4SSatish Balay *iptr_in++ = i;
8079371c9d4SSatish Balay *iptr_out++ = j;
8089371c9d4SSatish Balay }
809827bd09bSSatish Balay }
810827bd09bSSatish Balay }
811827bd09bSSatish Balay
812827bd09bSSatish Balay /* sentinel */
813827bd09bSSatish Balay *iptr_in = *iptr_out = -1;
8143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
815827bd09bSSatish Balay }
816827bd09bSSatish Balay
817f1ed62a8SBarry Smith /******************************************************************************/
PCTFS_gs_gop_local_out(PCTFS_gs_id * gs,PetscScalar * vals)818d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals)
819d71ae5a4SJacob Faibussowitsch {
82052f87cdaSBarry Smith PetscInt *num, *map, **reduce;
821a501084fSBarry Smith PetscScalar tmp;
822827bd09bSSatish Balay
8233fdc5746SBarry Smith PetscFunctionBegin;
824827bd09bSSatish Balay num = gs->num_gop_local_reduce;
825827bd09bSSatish Balay reduce = gs->gop_local_reduce;
826db4deed7SKarl Rupp while ((map = *reduce++)) {
827827bd09bSSatish Balay /* wall */
828db4deed7SKarl Rupp if (*num == 2) {
829827bd09bSSatish Balay num++;
830827bd09bSSatish Balay vals[map[1]] = vals[map[0]];
831db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */
832827bd09bSSatish Balay num++;
833827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]];
834db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */
835827bd09bSSatish Balay num++;
836827bd09bSSatish Balay vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
837db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/
838827bd09bSSatish Balay num++;
839827bd09bSSatish Balay tmp = *(vals + *map++);
8402fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp;
841827bd09bSSatish Balay }
842827bd09bSSatish Balay }
8433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
844827bd09bSSatish Balay }
845827bd09bSSatish Balay
8467b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_local_plus(PCTFS_gs_id * gs,PetscScalar * vals)847d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals)
848d71ae5a4SJacob Faibussowitsch {
84952f87cdaSBarry Smith PetscInt *num, *map, **reduce;
850a501084fSBarry Smith PetscScalar tmp;
851827bd09bSSatish Balay
8523fdc5746SBarry Smith PetscFunctionBegin;
853827bd09bSSatish Balay num = gs->num_local_reduce;
854827bd09bSSatish Balay reduce = gs->local_reduce;
855db4deed7SKarl Rupp while ((map = *reduce)) {
856827bd09bSSatish Balay /* wall */
857db4deed7SKarl Rupp if (*num == 2) {
8589371c9d4SSatish Balay num++;
8599371c9d4SSatish Balay reduce++;
860827bd09bSSatish Balay vals[map[1]] = vals[map[0]] += vals[map[1]];
861db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */
8629371c9d4SSatish Balay num++;
8639371c9d4SSatish Balay reduce++;
864827bd09bSSatish Balay vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]);
865db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */
8669371c9d4SSatish Balay num++;
8679371c9d4SSatish Balay reduce++;
8682fa5cd67SKarl Rupp vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
869db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/
870827bd09bSSatish Balay num++;
871827bd09bSSatish Balay tmp = 0.0;
8722fa5cd67SKarl Rupp while (*map >= 0) tmp += *(vals + *map++);
873827bd09bSSatish Balay
874827bd09bSSatish Balay map = *reduce++;
8752fa5cd67SKarl Rupp while (*map >= 0) *(vals + *map++) = tmp;
876827bd09bSSatish Balay }
877827bd09bSSatish Balay }
8783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
879827bd09bSSatish Balay }
880827bd09bSSatish Balay
8817b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_local_in_plus(PCTFS_gs_id * gs,PetscScalar * vals)882d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals)
883d71ae5a4SJacob Faibussowitsch {
88452f87cdaSBarry Smith PetscInt *num, *map, **reduce;
885a501084fSBarry Smith PetscScalar *base;
886827bd09bSSatish Balay
8873fdc5746SBarry Smith PetscFunctionBegin;
888827bd09bSSatish Balay num = gs->num_gop_local_reduce;
889827bd09bSSatish Balay reduce = gs->gop_local_reduce;
890db4deed7SKarl Rupp while ((map = *reduce++)) {
891827bd09bSSatish Balay /* wall */
892db4deed7SKarl Rupp if (*num == 2) {
893827bd09bSSatish Balay num++;
894827bd09bSSatish Balay vals[map[0]] += vals[map[1]];
895db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */
896827bd09bSSatish Balay num++;
897827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]]);
898db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */
899827bd09bSSatish Balay num++;
900827bd09bSSatish Balay vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
901db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/
902827bd09bSSatish Balay num++;
903827bd09bSSatish Balay base = vals + *map++;
9042fa5cd67SKarl Rupp while (*map >= 0) *base += *(vals + *map++);
905827bd09bSSatish Balay }
906827bd09bSSatish Balay }
9073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
908827bd09bSSatish Balay }
909827bd09bSSatish Balay
9107b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_free(PCTFS_gs_id * gs)911d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs)
912d71ae5a4SJacob Faibussowitsch {
91352f87cdaSBarry Smith PetscInt i;
914827bd09bSSatish Balay
9153fdc5746SBarry Smith PetscFunctionBegin;
9169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm));
9172fa5cd67SKarl Rupp if (gs->nghs) free((void *)gs->nghs);
9182fa5cd67SKarl Rupp if (gs->pw_nghs) free((void *)gs->pw_nghs);
919827bd09bSSatish Balay
920827bd09bSSatish Balay /* tree */
9212fa5cd67SKarl Rupp if (gs->max_left_over) {
9222fa5cd67SKarl Rupp if (gs->tree_elms) free((void *)gs->tree_elms);
9232fa5cd67SKarl Rupp if (gs->tree_buf) free((void *)gs->tree_buf);
9242fa5cd67SKarl Rupp if (gs->tree_work) free((void *)gs->tree_work);
9252fa5cd67SKarl Rupp if (gs->tree_map_in) free((void *)gs->tree_map_in);
9262fa5cd67SKarl Rupp if (gs->tree_map_out) free((void *)gs->tree_map_out);
927827bd09bSSatish Balay }
928827bd09bSSatish Balay
929827bd09bSSatish Balay /* pairwise info */
9302fa5cd67SKarl Rupp if (gs->num_pairs) {
931827bd09bSSatish Balay /* should be NULL already */
9322fa5cd67SKarl Rupp if (gs->ngh_buf) free((void *)gs->ngh_buf);
9332fa5cd67SKarl Rupp if (gs->elms) free((void *)gs->elms);
9342fa5cd67SKarl Rupp if (gs->local_elms) free((void *)gs->local_elms);
9352fa5cd67SKarl Rupp if (gs->companion) free((void *)gs->companion);
936827bd09bSSatish Balay
937827bd09bSSatish Balay /* only set if pairwise */
9382fa5cd67SKarl Rupp if (gs->vals) free((void *)gs->vals);
9392fa5cd67SKarl Rupp if (gs->in) free((void *)gs->in);
9402fa5cd67SKarl Rupp if (gs->out) free((void *)gs->out);
9412fa5cd67SKarl Rupp if (gs->msg_ids_in) free((void *)gs->msg_ids_in);
9422fa5cd67SKarl Rupp if (gs->msg_ids_out) free((void *)gs->msg_ids_out);
9432fa5cd67SKarl Rupp if (gs->pw_vals) free((void *)gs->pw_vals);
9442fa5cd67SKarl Rupp if (gs->pw_elm_list) free((void *)gs->pw_elm_list);
945db4deed7SKarl Rupp if (gs->node_list) {
946db4deed7SKarl Rupp for (i = 0; i < gs->num_pairs; i++) {
947ad540459SPierre Jolivet if (gs->node_list[i]) free((void *)gs->node_list[i]);
948db4deed7SKarl Rupp }
949a501084fSBarry Smith free((void *)gs->node_list);
950827bd09bSSatish Balay }
9512fa5cd67SKarl Rupp if (gs->msg_sizes) free((void *)gs->msg_sizes);
9522fa5cd67SKarl Rupp if (gs->pair_list) free((void *)gs->pair_list);
953827bd09bSSatish Balay }
954827bd09bSSatish Balay
955827bd09bSSatish Balay /* local info */
956db4deed7SKarl Rupp if (gs->num_local_total >= 0) {
957db4deed7SKarl Rupp for (i = 0; i < gs->num_local_total + 1; i++) {
9582fa5cd67SKarl Rupp if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]);
959827bd09bSSatish Balay }
960827bd09bSSatish Balay }
961827bd09bSSatish Balay
962827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */
9632fa5cd67SKarl Rupp if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce);
9642fa5cd67SKarl Rupp if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce);
965827bd09bSSatish Balay
966a501084fSBarry Smith free((void *)gs);
9673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
968827bd09bSSatish Balay }
969827bd09bSSatish Balay
9707b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_vec(PCTFS_gs_id * gs,PetscScalar * vals,const char * op,PetscInt step)971d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step)
972d71ae5a4SJacob Faibussowitsch {
9733fdc5746SBarry Smith PetscFunctionBegin;
974827bd09bSSatish Balay switch (*op) {
975d71ae5a4SJacob Faibussowitsch case '+':
9763ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
977d71ae5a4SJacob Faibussowitsch break;
978827bd09bSSatish Balay default:
9799566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0]));
9809566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n"));
9813ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
982827bd09bSSatish Balay break;
983827bd09bSSatish Balay }
9843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
985827bd09bSSatish Balay }
986827bd09bSSatish Balay
9877b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_vec_plus(PCTFS_gs_id * gs,PetscScalar * vals,PetscInt step)988d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
989d71ae5a4SJacob Faibussowitsch {
9903fdc5746SBarry Smith PetscFunctionBegin;
99128b400f6SJacob Faibussowitsch PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!");
992827bd09bSSatish Balay
993827bd09bSSatish Balay /* local only operations!!! */
9943ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_vec_local_plus(gs, vals, step));
995827bd09bSSatish Balay
996827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */
9972fa5cd67SKarl Rupp if (gs->num_local_gop) {
9983ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_in_plus(gs, vals, step));
999827bd09bSSatish Balay
1000827bd09bSSatish Balay /* pairwise */
10013ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1002827bd09bSSatish Balay
1003827bd09bSSatish Balay /* tree */
10043ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1005827bd09bSSatish Balay
10063ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_vec_local_out(gs, vals, step));
1007db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */
1008827bd09bSSatish Balay /* pairwise */
10093ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1010827bd09bSSatish Balay
1011827bd09bSSatish Balay /* tree */
10123ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1013827bd09bSSatish Balay }
10143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1015827bd09bSSatish Balay }
1016827bd09bSSatish Balay
10177b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id * gs,PetscScalar * vals,PetscInt step)1018d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1019d71ae5a4SJacob Faibussowitsch {
102052f87cdaSBarry Smith PetscInt *num, *map, **reduce;
1021a501084fSBarry Smith PetscScalar *base;
1022827bd09bSSatish Balay
10233fdc5746SBarry Smith PetscFunctionBegin;
1024827bd09bSSatish Balay num = gs->num_local_reduce;
1025827bd09bSSatish Balay reduce = gs->local_reduce;
1026db4deed7SKarl Rupp while ((map = *reduce)) {
1027827bd09bSSatish Balay base = vals + map[0] * step;
1028827bd09bSSatish Balay
1029827bd09bSSatish Balay /* wall */
1030db4deed7SKarl Rupp if (*num == 2) {
10319371c9d4SSatish Balay num++;
10329371c9d4SSatish Balay reduce++;
10333ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10343ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1035db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */
10369371c9d4SSatish Balay num++;
10379371c9d4SSatish Balay reduce++;
10383ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10393ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10403ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
10413ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1042db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */
10439371c9d4SSatish Balay num++;
10449371c9d4SSatish Balay reduce++;
10453ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10463ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10473ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
10483ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
10493ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
10503ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1051db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D */
1052827bd09bSSatish Balay num++;
10533ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1054827bd09bSSatish Balay
1055827bd09bSSatish Balay map = *reduce;
10563ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1057827bd09bSSatish Balay
1058827bd09bSSatish Balay reduce++;
1059827bd09bSSatish Balay }
1060827bd09bSSatish Balay }
10613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1062827bd09bSSatish Balay }
1063827bd09bSSatish Balay
10647b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id * gs,PetscScalar * vals,PetscInt step)1065d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1066d71ae5a4SJacob Faibussowitsch {
106752f87cdaSBarry Smith PetscInt *num, *map, **reduce;
1068a501084fSBarry Smith PetscScalar *base;
1069db4deed7SKarl Rupp
10703fdc5746SBarry Smith PetscFunctionBegin;
1071827bd09bSSatish Balay num = gs->num_gop_local_reduce;
1072827bd09bSSatish Balay reduce = gs->gop_local_reduce;
1073db4deed7SKarl Rupp while ((map = *reduce++)) {
1074827bd09bSSatish Balay base = vals + map[0] * step;
1075827bd09bSSatish Balay
1076827bd09bSSatish Balay /* wall */
1077db4deed7SKarl Rupp if (*num == 2) {
1078827bd09bSSatish Balay num++;
10793ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1080db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */
1081827bd09bSSatish Balay num++;
10823ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10833ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
1084db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */
1085827bd09bSSatish Balay num++;
10863ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
10873ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
10883ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
1089db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/
1090827bd09bSSatish Balay num++;
10913ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1092827bd09bSSatish Balay }
1093827bd09bSSatish Balay }
10943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1095827bd09bSSatish Balay }
1096827bd09bSSatish Balay
10977b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_vec_local_out(PCTFS_gs_id * gs,PetscScalar * vals,PetscInt step)1098d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1099d71ae5a4SJacob Faibussowitsch {
110052f87cdaSBarry Smith PetscInt *num, *map, **reduce;
1101a501084fSBarry Smith PetscScalar *base;
1102827bd09bSSatish Balay
11033fdc5746SBarry Smith PetscFunctionBegin;
1104827bd09bSSatish Balay num = gs->num_gop_local_reduce;
1105827bd09bSSatish Balay reduce = gs->gop_local_reduce;
1106db4deed7SKarl Rupp while ((map = *reduce++)) {
1107827bd09bSSatish Balay base = vals + map[0] * step;
1108827bd09bSSatish Balay
1109827bd09bSSatish Balay /* wall */
1110db4deed7SKarl Rupp if (*num == 2) {
1111827bd09bSSatish Balay num++;
11123ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1113db4deed7SKarl Rupp } else if (*num == 3) { /* corner shared by three elements */
1114827bd09bSSatish Balay num++;
11153ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
11163ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
1117db4deed7SKarl Rupp } else if (*num == 4) { /* corner shared by four elements */
1118827bd09bSSatish Balay num++;
11193ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
11203ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
11213ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
1122db4deed7SKarl Rupp } else { /* general case ... odd geoms ... 3D*/
1123827bd09bSSatish Balay num++;
11243ba16761SJacob Faibussowitsch while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1125827bd09bSSatish Balay }
1126827bd09bSSatish Balay }
11273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1128827bd09bSSatish Balay }
1129827bd09bSSatish Balay
11307b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id * gs,PetscScalar * in_vals,PetscInt step)1131d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step)
1132d71ae5a4SJacob Faibussowitsch {
1133a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
113452f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
113552f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes;
1136827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1137827bd09bSSatish Balay MPI_Status status;
11380805154bSBarry Smith PetscBLASInt i1 = 1, dstep;
1139827bd09bSSatish Balay
11403fdc5746SBarry Smith PetscFunctionBegin;
1141a501084fSBarry Smith /* strip and load s */
1142827bd09bSSatish Balay msg_list = list = gs->pair_list;
1143827bd09bSSatish Balay msg_size = size = gs->msg_sizes;
1144827bd09bSSatish Balay msg_nodes = nodes = gs->node_list;
1145827bd09bSSatish Balay iptr = pw = gs->pw_elm_list;
1146827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals;
1147827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in;
1148827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out;
1149827bd09bSSatish Balay dptr2 = gs->out;
1150827bd09bSSatish Balay in1 = in2 = gs->in;
1151827bd09bSSatish Balay
1152827bd09bSSatish Balay /* post the receives */
1153827bd09bSSatish Balay /* msg_nodes=nodes; */
1154db4deed7SKarl Rupp do {
1155827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1156827bd09bSSatish Balay second one *list and do list++ afterwards */
1157458b0db5SMartin Diehl PetscCallMPI(MPIU_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
11589371c9d4SSatish Balay list++;
11599371c9d4SSatish Balay msg_ids_in++;
1160827bd09bSSatish Balay in1 += *size++ * step;
11612fa5cd67SKarl Rupp } while (*++msg_nodes);
1162827bd09bSSatish Balay msg_nodes = nodes;
1163827bd09bSSatish Balay
1164827bd09bSSatish Balay /* load gs values into in out gs buffers */
1165db4deed7SKarl Rupp while (*iptr >= 0) {
11663ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step));
1167827bd09bSSatish Balay dptr3 += step;
1168827bd09bSSatish Balay iptr++;
1169827bd09bSSatish Balay }
1170827bd09bSSatish Balay
1171827bd09bSSatish Balay /* load out buffers and post the sends */
1172db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) {
1173827bd09bSSatish Balay dptr3 = dptr2;
1174db4deed7SKarl Rupp while (*iptr >= 0) {
11753ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step));
1176827bd09bSSatish Balay dptr2 += step;
1177827bd09bSSatish Balay iptr++;
1178827bd09bSSatish Balay }
1179458b0db5SMartin Diehl PetscCallMPI(MPIU_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
11809371c9d4SSatish Balay msg_size++;
11819371c9d4SSatish Balay msg_list++;
11829371c9d4SSatish Balay msg_ids_out++;
1183827bd09bSSatish Balay }
1184827bd09bSSatish Balay
1185827bd09bSSatish Balay /* tree */
11863ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step));
1187827bd09bSSatish Balay
1188827bd09bSSatish Balay /* process the received data */
1189827bd09bSSatish Balay msg_nodes = nodes;
1190a501084fSBarry Smith while ((iptr = *nodes++)) {
1191a501084fSBarry Smith PetscScalar d1 = 1.0;
1192db4deed7SKarl Rupp
1193827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */
1194827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */
11959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status));
11969182e22cSBarry Smith ids_in++;
1197a501084fSBarry Smith while (*iptr >= 0) {
11989566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep));
1199792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1));
1200827bd09bSSatish Balay in2 += step;
1201827bd09bSSatish Balay iptr++;
1202827bd09bSSatish Balay }
1203827bd09bSSatish Balay }
1204827bd09bSSatish Balay
1205827bd09bSSatish Balay /* replace vals */
1206db4deed7SKarl Rupp while (*pw >= 0) {
12073ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step));
1208827bd09bSSatish Balay dptr1 += step;
1209827bd09bSSatish Balay pw++;
1210827bd09bSSatish Balay }
1211827bd09bSSatish Balay
1212827bd09bSSatish Balay /* clear isend message handles */
1213827bd09bSSatish Balay /* This changed for clarity though it could be the same */
1214db4deed7SKarl Rupp
1215827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */
1216827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */
12172fa5cd67SKarl Rupp while (*msg_nodes++) {
12189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status));
12192fa5cd67SKarl Rupp ids_out++;
12202fa5cd67SKarl Rupp }
12213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1222827bd09bSSatish Balay }
1223827bd09bSSatish Balay
12247b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id * gs,PetscScalar * vals,PetscInt step)1225d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1226d71ae5a4SJacob Faibussowitsch {
122752f87cdaSBarry Smith PetscInt size, *in, *out;
1228a501084fSBarry Smith PetscScalar *buf, *work;
122952f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0};
1230a501084fSBarry Smith PetscBLASInt i1 = 1;
1231c5df96a5SBarry Smith PetscBLASInt dstep;
1232827bd09bSSatish Balay
12333fdc5746SBarry Smith PetscFunctionBegin;
1234827bd09bSSatish Balay /* copy over to local variables */
1235827bd09bSSatish Balay in = gs->tree_map_in;
1236827bd09bSSatish Balay out = gs->tree_map_out;
1237827bd09bSSatish Balay buf = gs->tree_buf;
1238827bd09bSSatish Balay work = gs->tree_work;
1239827bd09bSSatish Balay size = gs->tree_nel * step;
1240827bd09bSSatish Balay
1241827bd09bSSatish Balay /* zero out collection buffer */
12423ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size));
1243827bd09bSSatish Balay
1244827bd09bSSatish Balay /* copy over my contributions */
1245db4deed7SKarl Rupp while (*in >= 0) {
12469566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep));
1247792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1));
1248827bd09bSSatish Balay }
1249827bd09bSSatish Balay
1250827bd09bSSatish Balay /* perform fan in/out on full buffer */
1251b1c944f5SJed Brown /* must change PCTFS_grop to handle the blas */
12523ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop(buf, work, size, op));
1253827bd09bSSatish Balay
1254827bd09bSSatish Balay /* reset */
1255827bd09bSSatish Balay in = gs->tree_map_in;
1256827bd09bSSatish Balay out = gs->tree_map_out;
1257827bd09bSSatish Balay
1258827bd09bSSatish Balay /* get the portion of the results I need */
1259db4deed7SKarl Rupp while (*in >= 0) {
12609566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(step, &dstep));
1261792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1));
1262827bd09bSSatish Balay }
12633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1264827bd09bSSatish Balay }
1265827bd09bSSatish Balay
12667b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_hc(PCTFS_gs_id * gs,PetscScalar * vals,const char * op,PetscInt dim)1267d71ae5a4SJacob Faibussowitsch PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim)
1268d71ae5a4SJacob Faibussowitsch {
12693fdc5746SBarry Smith PetscFunctionBegin;
1270827bd09bSSatish Balay switch (*op) {
1271d71ae5a4SJacob Faibussowitsch case '+':
12723ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1273d71ae5a4SJacob Faibussowitsch break;
1274827bd09bSSatish Balay default:
12759566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0]));
12769566063dSJacob Faibussowitsch PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n"));
12773ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1278827bd09bSSatish Balay break;
1279827bd09bSSatish Balay }
12803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1281827bd09bSSatish Balay }
1282827bd09bSSatish Balay
12837b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_plus_hc(PCTFS_gs_id * gs,PetscScalar * vals,PetscInt dim)1284d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1285d71ae5a4SJacob Faibussowitsch {
12863fdc5746SBarry Smith PetscFunctionBegin;
1287827bd09bSSatish Balay /* if there's nothing to do return */
12883ba16761SJacob Faibussowitsch if (dim <= 0) PetscFunctionReturn(PETSC_SUCCESS);
1289827bd09bSSatish Balay
1290827bd09bSSatish Balay /* can't do more dimensions then exist */
1291b1c944f5SJed Brown dim = PetscMin(dim, PCTFS_i_log2_num_nodes);
1292827bd09bSSatish Balay
1293827bd09bSSatish Balay /* local only operations!!! */
12943ba16761SJacob Faibussowitsch if (gs->num_local) PetscCall(PCTFS_gs_gop_local_plus(gs, vals));
1295827bd09bSSatish Balay
1296827bd09bSSatish Balay /* if intersection tree/pairwise and local isn't empty */
1297db4deed7SKarl Rupp if (gs->num_local_gop) {
12983ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_in_plus(gs, vals));
1299827bd09bSSatish Balay
1300827bd09bSSatish Balay /* pairwise will do tree inside ... */
13013ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree only */
13023ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1303827bd09bSSatish Balay
13043ba16761SJacob Faibussowitsch PetscCall(PCTFS_gs_gop_local_out(gs, vals));
1305db4deed7SKarl Rupp } else { /* if intersection tree/pairwise and local is empty */
1306827bd09bSSatish Balay /* pairwise will do tree inside */
13073ba16761SJacob Faibussowitsch if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree */
13083ba16761SJacob Faibussowitsch else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1309827bd09bSSatish Balay }
13103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1311827bd09bSSatish Balay }
1312827bd09bSSatish Balay
13137b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id * gs,PetscScalar * in_vals,PetscInt dim)1314d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim)
1315d71ae5a4SJacob Faibussowitsch {
1316a501084fSBarry Smith PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
131752f87cdaSBarry Smith PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
131852f87cdaSBarry Smith PetscInt *pw, *list, *size, **nodes;
1319827bd09bSSatish Balay MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1320827bd09bSSatish Balay MPI_Status status;
132152f87cdaSBarry Smith PetscInt i, mask = 1;
1322827bd09bSSatish Balay
13233fdc5746SBarry Smith PetscFunctionBegin;
13249371c9d4SSatish Balay for (i = 1; i < dim; i++) {
13259371c9d4SSatish Balay mask <<= 1;
13269371c9d4SSatish Balay mask++;
13279371c9d4SSatish Balay }
1328827bd09bSSatish Balay
1329a501084fSBarry Smith /* strip and load s */
1330827bd09bSSatish Balay msg_list = list = gs->pair_list;
1331827bd09bSSatish Balay msg_size = size = gs->msg_sizes;
1332827bd09bSSatish Balay msg_nodes = nodes = gs->node_list;
1333827bd09bSSatish Balay iptr = pw = gs->pw_elm_list;
1334827bd09bSSatish Balay dptr1 = dptr3 = gs->pw_vals;
1335827bd09bSSatish Balay msg_ids_in = ids_in = gs->msg_ids_in;
1336827bd09bSSatish Balay msg_ids_out = ids_out = gs->msg_ids_out;
1337827bd09bSSatish Balay dptr2 = gs->out;
1338827bd09bSSatish Balay in1 = in2 = gs->in;
1339827bd09bSSatish Balay
1340827bd09bSSatish Balay /* post the receives */
1341827bd09bSSatish Balay /* msg_nodes=nodes; */
1342db4deed7SKarl Rupp do {
1343827bd09bSSatish Balay /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1344827bd09bSSatish Balay second one *list and do list++ afterwards */
1345db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) {
1346458b0db5SMartin Diehl PetscCallMPI(MPIU_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
13479371c9d4SSatish Balay list++;
13489371c9d4SSatish Balay msg_ids_in++;
13499371c9d4SSatish Balay in1 += *size++;
13509371c9d4SSatish Balay } else {
13519371c9d4SSatish Balay list++;
13529371c9d4SSatish Balay size++;
13539371c9d4SSatish Balay }
13542fa5cd67SKarl Rupp } while (*++msg_nodes);
1355827bd09bSSatish Balay
1356827bd09bSSatish Balay /* load gs values into in out gs buffers */
13572fa5cd67SKarl Rupp while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++);
1358827bd09bSSatish Balay
1359827bd09bSSatish Balay /* load out buffers and post the sends */
1360827bd09bSSatish Balay msg_nodes = nodes;
1361827bd09bSSatish Balay list = msg_list;
1362db4deed7SKarl Rupp while ((iptr = *msg_nodes++)) {
1363db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) {
1364827bd09bSSatish Balay dptr3 = dptr2;
13652fa5cd67SKarl Rupp while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++);
1366827bd09bSSatish Balay /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1367827bd09bSSatish Balay /* is msg_ids_out++ correct? */
1368458b0db5SMartin Diehl PetscCallMPI(MPIU_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
13699371c9d4SSatish Balay msg_size++;
13709371c9d4SSatish Balay list++;
13719371c9d4SSatish Balay msg_ids_out++;
13729371c9d4SSatish Balay } else {
13739371c9d4SSatish Balay list++;
13749371c9d4SSatish Balay msg_size++;
13759371c9d4SSatish Balay }
1376827bd09bSSatish Balay }
1377827bd09bSSatish Balay
1378827bd09bSSatish Balay /* do the tree while we're waiting */
13793ba16761SJacob Faibussowitsch if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim));
1380827bd09bSSatish Balay
1381827bd09bSSatish Balay /* process the received data */
1382827bd09bSSatish Balay msg_nodes = nodes;
1383827bd09bSSatish Balay list = msg_list;
1384db4deed7SKarl Rupp while ((iptr = *nodes++)) {
1385db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*list | mask)) {
1386827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */
1387827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */
13889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_in, &status));
13899182e22cSBarry Smith ids_in++;
13902fa5cd67SKarl Rupp while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++;
1391827bd09bSSatish Balay }
1392827bd09bSSatish Balay list++;
1393827bd09bSSatish Balay }
1394827bd09bSSatish Balay
1395827bd09bSSatish Balay /* replace vals */
13962fa5cd67SKarl Rupp while (*pw >= 0) *(in_vals + *pw++) = *dptr1++;
1397827bd09bSSatish Balay
1398827bd09bSSatish Balay /* clear isend message handles */
1399827bd09bSSatish Balay /* This changed for clarity though it could be the same */
1400db4deed7SKarl Rupp while (*msg_nodes++) {
1401db4deed7SKarl Rupp if ((PCTFS_my_id | mask) == (*msg_list | mask)) {
1402827bd09bSSatish Balay /* Should I check the return value of MPI_Wait() or status? */
1403827bd09bSSatish Balay /* Can this loop be replaced by a call to MPI_Waitall()? */
14049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Wait(ids_out, &status));
14059182e22cSBarry Smith ids_out++;
1406827bd09bSSatish Balay }
1407827bd09bSSatish Balay msg_list++;
1408827bd09bSSatish Balay }
14093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1410827bd09bSSatish Balay }
1411827bd09bSSatish Balay
14127b1ae94cSBarry Smith /******************************************************************************/
PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id * gs,PetscScalar * vals,PetscInt dim)1413d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1414d71ae5a4SJacob Faibussowitsch {
141552f87cdaSBarry Smith PetscInt size;
141652f87cdaSBarry Smith PetscInt *in, *out;
1417a501084fSBarry Smith PetscScalar *buf, *work;
141852f87cdaSBarry Smith PetscInt op[] = {GL_ADD, 0};
1419827bd09bSSatish Balay
14203fdc5746SBarry Smith PetscFunctionBegin;
1421827bd09bSSatish Balay in = gs->tree_map_in;
1422827bd09bSSatish Balay out = gs->tree_map_out;
1423827bd09bSSatish Balay buf = gs->tree_buf;
1424827bd09bSSatish Balay work = gs->tree_work;
1425827bd09bSSatish Balay size = gs->tree_nel;
1426827bd09bSSatish Balay
14273ba16761SJacob Faibussowitsch PetscCall(PCTFS_rvec_zero(buf, size));
1428827bd09bSSatish Balay
14292fa5cd67SKarl Rupp while (*in >= 0) *(buf + *out++) = *(vals + *in++);
1430827bd09bSSatish Balay
1431827bd09bSSatish Balay in = gs->tree_map_in;
1432827bd09bSSatish Balay out = gs->tree_map_out;
1433827bd09bSSatish Balay
14343ba16761SJacob Faibussowitsch PetscCall(PCTFS_grop_hc(buf, work, size, op, dim));
1435827bd09bSSatish Balay
14362fa5cd67SKarl Rupp while (*in >= 0) *(vals + *in++) = *(buf + *out++);
14373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1438827bd09bSSatish Balay }
1439