1 /***********************************comm.c*************************************
2
3 Author: Henry M. Tufo III
4
5 e-mail: hmt@cs.brown.edu
6
7 snail-mail:
8 Division of Applied Mathematics
9 Brown University
10 Providence, RI 02912
11
12 Last Modification:
13 11.21.97
14 ***********************************comm.c*************************************/
15 #include <../src/ksp/pc/impls/tfs/tfs.h>
16
17 /* global program control variables - explicitly exported */
18 PetscMPIInt PCTFS_my_id = 0;
19 PetscMPIInt PCTFS_num_nodes = 1;
20 PetscMPIInt PCTFS_floor_num_nodes = 0;
21 PetscMPIInt PCTFS_i_log2_num_nodes = 0;
22
23 /* global program control variables */
24 static PetscInt p_init = 0;
25 static PetscInt modfl_num_nodes;
26 static PetscInt edge_not_pow_2;
27
28 static PetscInt edge_node[sizeof(PetscInt) * 32];
29
30 /***********************************comm.c*************************************/
PCTFS_comm_init(void)31 PetscErrorCode PCTFS_comm_init(void)
32 {
33 PetscFunctionBegin;
34 if (p_init++) PetscFunctionReturn(PETSC_SUCCESS);
35
36 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &PCTFS_num_nodes));
37 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &PCTFS_my_id));
38
39 PetscCheck(PCTFS_num_nodes <= (INT_MAX >> 1), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Can't have more than MAX_INT/2 nodes!!!");
40
41 PetscCall(PCTFS_ivec_zero((PetscInt *)edge_node, sizeof(PetscInt) * 32));
42
43 PCTFS_floor_num_nodes = 1;
44 PCTFS_i_log2_num_nodes = modfl_num_nodes = 0;
45 while (PCTFS_floor_num_nodes <= PCTFS_num_nodes) {
46 edge_node[PCTFS_i_log2_num_nodes] = PCTFS_my_id ^ PCTFS_floor_num_nodes;
47 PCTFS_floor_num_nodes <<= 1;
48 PCTFS_i_log2_num_nodes++;
49 }
50
51 PCTFS_i_log2_num_nodes--;
52 PCTFS_floor_num_nodes >>= 1;
53 modfl_num_nodes = (PCTFS_num_nodes - PCTFS_floor_num_nodes);
54
55 if ((PCTFS_my_id > 0) && (PCTFS_my_id <= modfl_num_nodes)) edge_not_pow_2 = ((PCTFS_my_id | PCTFS_floor_num_nodes) - 1);
56 else if (PCTFS_my_id >= PCTFS_floor_num_nodes) edge_not_pow_2 = ((PCTFS_my_id ^ PCTFS_floor_num_nodes) + 1);
57 else edge_not_pow_2 = 0;
58 PetscFunctionReturn(PETSC_SUCCESS);
59 }
60
61 /***********************************comm.c*************************************/
PCTFS_giop(PetscInt * vals,PetscInt * work,PetscInt n,PetscInt * oprs)62 PetscErrorCode PCTFS_giop(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs)
63 {
64 PetscInt mask, edge;
65 PetscInt type, dest;
66 vfp fp;
67 MPI_Status status;
68
69 PetscFunctionBegin;
70 /* ok ... should have some data, work, and operator(s) */
71 PetscCheck(vals && work && oprs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop() :: vals=%p, work=%p, oprs=%p", (void *)vals, (void *)work, (void *)oprs);
72
73 /* non-uniform should have at least two entries */
74 PetscCheck(!(oprs[0] == NON_UNIFORM) || !(n < 2), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop() :: non_uniform and n=0,1?");
75
76 /* check to make sure comm package has been initialized */
77 if (!p_init) PetscCall(PCTFS_comm_init());
78
79 /* if there's nothing to do return */
80 if ((PCTFS_num_nodes < 2) || (!n)) PetscFunctionReturn(PETSC_SUCCESS);
81
82 /* a negative number if items to send ==> fatal */
83 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop() :: n=%" PetscInt_FMT "<0?", n);
84
85 /* advance to list of n operations for custom */
86 if ((type = oprs[0]) == NON_UNIFORM) oprs++;
87
88 /* major league hack */
89 PetscCheck(fp = (vfp)PCTFS_ivec_fct_addr(type), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop() :: Could not retrieve function pointer!");
90
91 /* all msgs will be of the same length */
92 /* if not a hypercube must collapse partial dim */
93 if (edge_not_pow_2) {
94 if (PCTFS_my_id >= PCTFS_floor_num_nodes) {
95 PetscCallMPI(MPI_Send(vals, n, MPIU_INT, edge_not_pow_2, MSGTAG0 + PCTFS_my_id, MPI_COMM_WORLD));
96 } else {
97 PetscCallMPI(MPI_Recv(work, n, MPIU_INT, MPI_ANY_SOURCE, MSGTAG0 + edge_not_pow_2, MPI_COMM_WORLD, &status));
98 PetscCall((*fp)(vals, work, n, oprs));
99 }
100 }
101
102 /* implement the mesh fan in/out exchange algorithm */
103 if (PCTFS_my_id < PCTFS_floor_num_nodes) {
104 for (mask = 1, edge = 0; edge < PCTFS_i_log2_num_nodes; edge++, mask <<= 1) {
105 dest = PCTFS_my_id ^ mask;
106 if (PCTFS_my_id > dest) {
107 PetscCallMPI(MPI_Send(vals, n, MPIU_INT, dest, MSGTAG2 + PCTFS_my_id, MPI_COMM_WORLD));
108 } else {
109 PetscCallMPI(MPI_Recv(work, n, MPIU_INT, MPI_ANY_SOURCE, MSGTAG2 + dest, MPI_COMM_WORLD, &status));
110 PetscCall((*fp)(vals, work, n, oprs));
111 }
112 }
113
114 mask = PCTFS_floor_num_nodes >> 1;
115 for (edge = 0; edge < PCTFS_i_log2_num_nodes; edge++, mask >>= 1) {
116 if (PCTFS_my_id % mask) continue;
117
118 dest = PCTFS_my_id ^ mask;
119 if (PCTFS_my_id < dest) {
120 PetscCallMPI(MPI_Send(vals, n, MPIU_INT, dest, MSGTAG4 + PCTFS_my_id, MPI_COMM_WORLD));
121 } else {
122 PetscCallMPI(MPI_Recv(vals, n, MPIU_INT, MPI_ANY_SOURCE, MSGTAG4 + dest, MPI_COMM_WORLD, &status));
123 }
124 }
125 }
126
127 /* if not a hypercube must expand to partial dim */
128 if (edge_not_pow_2) {
129 if (PCTFS_my_id >= PCTFS_floor_num_nodes) {
130 PetscCallMPI(MPI_Recv(vals, n, MPIU_INT, MPI_ANY_SOURCE, MSGTAG5 + edge_not_pow_2, MPI_COMM_WORLD, &status));
131 } else {
132 PetscCallMPI(MPI_Send(vals, n, MPIU_INT, edge_not_pow_2, MSGTAG5 + PCTFS_my_id, MPI_COMM_WORLD));
133 }
134 }
135 PetscFunctionReturn(PETSC_SUCCESS);
136 }
137
138 /***********************************comm.c*************************************/
PCTFS_grop(PetscScalar * vals,PetscScalar * work,PetscInt n,PetscInt * oprs)139 PetscErrorCode PCTFS_grop(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs)
140 {
141 PetscInt mask, edge;
142 PetscInt type, dest;
143 vfp fp;
144 MPI_Status status;
145
146 PetscFunctionBegin;
147 /* ok ... should have some data, work, and operator(s) */
148 PetscCheck(vals && work && oprs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop() :: vals=%p, work=%p, oprs=%p", (void *)vals, (void *)work, (void *)oprs);
149
150 /* non-uniform should have at least two entries */
151 PetscCheck(!(oprs[0] == NON_UNIFORM) || !(n < 2), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop() :: non_uniform and n=0,1?");
152
153 /* check to make sure comm package has been initialized */
154 if (!p_init) PetscCall(PCTFS_comm_init());
155
156 /* if there's nothing to do return */
157 if ((PCTFS_num_nodes < 2) || (!n)) PetscFunctionReturn(PETSC_SUCCESS);
158
159 /* a negative number of items to send ==> fatal */
160 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "gdop() :: n=%" PetscInt_FMT "<0?", n);
161
162 /* advance to list of n operations for custom */
163 if ((type = oprs[0]) == NON_UNIFORM) oprs++;
164
165 PetscCheck(fp = (vfp)PCTFS_rvec_fct_addr(type), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop() :: Could not retrieve function pointer!");
166
167 /* all msgs will be of the same length */
168 /* if not a hypercube must collapse partial dim */
169 if (edge_not_pow_2) {
170 if (PCTFS_my_id >= PCTFS_floor_num_nodes) {
171 PetscCallMPI(MPI_Send(vals, n, MPIU_SCALAR, edge_not_pow_2, MSGTAG0 + PCTFS_my_id, MPI_COMM_WORLD));
172 } else {
173 PetscCallMPI(MPI_Recv(work, n, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG0 + edge_not_pow_2, MPI_COMM_WORLD, &status));
174 PetscCall((*fp)(vals, work, n, oprs));
175 }
176 }
177
178 /* implement the mesh fan in/out exchange algorithm */
179 if (PCTFS_my_id < PCTFS_floor_num_nodes) {
180 for (mask = 1, edge = 0; edge < PCTFS_i_log2_num_nodes; edge++, mask <<= 1) {
181 dest = PCTFS_my_id ^ mask;
182 if (PCTFS_my_id > dest) {
183 PetscCallMPI(MPI_Send(vals, n, MPIU_SCALAR, dest, MSGTAG2 + PCTFS_my_id, MPI_COMM_WORLD));
184 } else {
185 PetscCallMPI(MPI_Recv(work, n, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG2 + dest, MPI_COMM_WORLD, &status));
186 PetscCall((*fp)(vals, work, n, oprs));
187 }
188 }
189
190 mask = PCTFS_floor_num_nodes >> 1;
191 for (edge = 0; edge < PCTFS_i_log2_num_nodes; edge++, mask >>= 1) {
192 if (PCTFS_my_id % mask) continue;
193
194 dest = PCTFS_my_id ^ mask;
195 if (PCTFS_my_id < dest) {
196 PetscCallMPI(MPI_Send(vals, n, MPIU_SCALAR, dest, MSGTAG4 + PCTFS_my_id, MPI_COMM_WORLD));
197 } else {
198 PetscCallMPI(MPI_Recv(vals, n, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG4 + dest, MPI_COMM_WORLD, &status));
199 }
200 }
201 }
202
203 /* if not a hypercube must expand to partial dim */
204 if (edge_not_pow_2) {
205 if (PCTFS_my_id >= PCTFS_floor_num_nodes) {
206 PetscCallMPI(MPI_Recv(vals, n, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG5 + edge_not_pow_2, MPI_COMM_WORLD, &status));
207 } else {
208 PetscCallMPI(MPI_Send(vals, n, MPIU_SCALAR, edge_not_pow_2, MSGTAG5 + PCTFS_my_id, MPI_COMM_WORLD));
209 }
210 }
211 PetscFunctionReturn(PETSC_SUCCESS);
212 }
213
214 /***********************************comm.c*************************************/
PCTFS_grop_hc(PetscScalar * vals,PetscScalar * work,PetscInt n,PetscInt * oprs,PetscInt dim)215 PetscErrorCode PCTFS_grop_hc(PetscScalar *vals, PetscScalar *work, PetscInt n, PetscInt *oprs, PetscInt dim)
216 {
217 PetscInt mask, edge;
218 PetscInt type, dest;
219 vfp fp;
220 MPI_Status status;
221
222 PetscFunctionBegin;
223 /* ok ... should have some data, work, and operator(s) */
224 PetscCheck(vals && work && oprs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop_hc() :: vals=%p, work=%p, oprs=%p", (void *)vals, (void *)work, (void *)oprs);
225
226 /* non-uniform should have at least two entries */
227 PetscCheck(!(oprs[0] == NON_UNIFORM) || !(n < 2), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop_hc() :: non_uniform and n=0,1?");
228
229 /* check to make sure comm package has been initialized */
230 if (!p_init) PetscCall(PCTFS_comm_init());
231
232 /* if there's nothing to do return */
233 if ((PCTFS_num_nodes < 2) || (!n) || (dim <= 0)) PetscFunctionReturn(PETSC_SUCCESS);
234
235 /* the error msg says it all!!! */
236 PetscCheck(!modfl_num_nodes, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop_hc() :: PCTFS_num_nodes not a power of 2!?!");
237
238 /* a negative number of items to send ==> fatal */
239 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop_hc() :: n=%" PetscInt_FMT "<0?", n);
240
241 /* can't do more dimensions then exist */
242 dim = PetscMin(dim, PCTFS_i_log2_num_nodes);
243
244 /* advance to list of n operations for custom */
245 if ((type = oprs[0]) == NON_UNIFORM) oprs++;
246
247 PetscCheck(fp = (vfp)PCTFS_rvec_fct_addr(type), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_grop_hc() :: Could not retrieve function pointer!");
248
249 for (mask = 1, edge = 0; edge < dim; edge++, mask <<= 1) {
250 dest = PCTFS_my_id ^ mask;
251 if (PCTFS_my_id > dest) {
252 PetscCallMPI(MPI_Send(vals, n, MPIU_SCALAR, dest, MSGTAG2 + PCTFS_my_id, MPI_COMM_WORLD));
253 } else {
254 PetscCallMPI(MPI_Recv(work, n, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG2 + dest, MPI_COMM_WORLD, &status));
255 PetscCall((*fp)(vals, work, n, oprs));
256 }
257 }
258
259 if (edge == dim) mask >>= 1;
260 else {
261 while (++edge < dim) mask <<= 1;
262 }
263
264 for (edge = 0; edge < dim; edge++, mask >>= 1) {
265 if (PCTFS_my_id % mask) continue;
266
267 dest = PCTFS_my_id ^ mask;
268 if (PCTFS_my_id < dest) {
269 PetscCallMPI(MPI_Send(vals, n, MPIU_SCALAR, dest, MSGTAG4 + PCTFS_my_id, MPI_COMM_WORLD));
270 } else {
271 PetscCallMPI(MPI_Recv(vals, n, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG4 + dest, MPI_COMM_WORLD, &status));
272 }
273 }
274 PetscFunctionReturn(PETSC_SUCCESS);
275 }
276
277 /******************************************************************************/
PCTFS_ssgl_radd(PetscScalar * vals,PetscScalar * work,PetscInt level,PetscInt * segs)278 PetscErrorCode PCTFS_ssgl_radd(PetscScalar *vals, PetscScalar *work, PetscInt level, PetscInt *segs)
279 {
280 PetscInt edge, type, dest, mask;
281 PetscInt stage_n;
282 MPI_Status status;
283 PetscMPIInt *maxval, iflg;
284
285 PetscFunctionBegin;
286 /* check to make sure comm package has been initialized */
287 if (!p_init) PetscCall(PCTFS_comm_init());
288
289 /* all msgs are *NOT* the same length */
290 /* implement the mesh fan in/out exchange algorithm */
291 for (mask = 0, edge = 0; edge < level; edge++, mask++) {
292 stage_n = (segs[level] - segs[edge]);
293 if (stage_n && !(PCTFS_my_id & mask)) {
294 dest = edge_node[edge];
295 type = MSGTAG3 + PCTFS_my_id + (PCTFS_num_nodes * edge);
296 if (PCTFS_my_id > dest) {
297 PetscCallMPI(MPI_Send(vals + segs[edge], stage_n, MPIU_SCALAR, dest, type, MPI_COMM_WORLD));
298 } else {
299 type = type - PCTFS_my_id + dest;
300 PetscCallMPI(MPI_Recv(work, stage_n, MPIU_SCALAR, MPI_ANY_SOURCE, type, MPI_COMM_WORLD, &status));
301 PetscCall(PCTFS_rvec_add(vals + segs[edge], work, stage_n));
302 }
303 }
304 mask <<= 1;
305 }
306 mask >>= 1;
307 for (edge = 0; edge < level; edge++) {
308 stage_n = (segs[level] - segs[level - 1 - edge]);
309 if (stage_n && !(PCTFS_my_id & mask)) {
310 dest = edge_node[level - edge - 1];
311 type = MSGTAG6 + PCTFS_my_id + (PCTFS_num_nodes * edge);
312 PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &iflg));
313 PetscCheck(iflg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
314 PetscCheck(*maxval > type, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPI_TAG_UB for your current MPI implementation is not large enough to use PCTFS");
315 if (PCTFS_my_id < dest) {
316 PetscCallMPI(MPI_Send(vals + segs[level - 1 - edge], stage_n, MPIU_SCALAR, dest, type, MPI_COMM_WORLD));
317 } else {
318 type = type - PCTFS_my_id + dest;
319 PetscCallMPI(MPI_Recv(vals + segs[level - 1 - edge], stage_n, MPIU_SCALAR, MPI_ANY_SOURCE, type, MPI_COMM_WORLD, &status));
320 }
321 }
322 mask >>= 1;
323 }
324 PetscFunctionReturn(PETSC_SUCCESS);
325 }
326
327 /***********************************comm.c*************************************/
PCTFS_giop_hc(PetscInt * vals,PetscInt * work,PetscInt n,PetscInt * oprs,PetscInt dim)328 PetscErrorCode PCTFS_giop_hc(PetscInt *vals, PetscInt *work, PetscInt n, PetscInt *oprs, PetscInt dim)
329 {
330 PetscInt mask, edge;
331 PetscInt type, dest;
332 vfp fp;
333 MPI_Status status;
334
335 PetscFunctionBegin;
336 /* ok ... should have some data, work, and operator(s) */
337 PetscCheck(vals && work && oprs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop_hc() :: vals=%p, work=%p, oprs=%p", (void *)vals, (void *)work, (void *)oprs);
338
339 /* non-uniform should have at least two entries */
340 PetscCheck(!(oprs[0] == NON_UNIFORM) || !(n < 2), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop_hc() :: non_uniform and n=0,1?");
341
342 /* check to make sure comm package has been initialized */
343 if (!p_init) PetscCall(PCTFS_comm_init());
344
345 /* if there's nothing to do return */
346 if ((PCTFS_num_nodes < 2) || (!n) || (dim <= 0)) PetscFunctionReturn(PETSC_SUCCESS);
347
348 /* the error msg says it all!!! */
349 PetscCheck(!modfl_num_nodes, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop_hc() :: PCTFS_num_nodes not a power of 2!?!");
350
351 /* a negative number of items to send ==> fatal */
352 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop_hc() :: n=%" PetscInt_FMT "<0?", n);
353
354 /* can't do more dimensions then exist */
355 dim = PetscMin(dim, PCTFS_i_log2_num_nodes);
356
357 /* advance to list of n operations for custom */
358 if ((type = oprs[0]) == NON_UNIFORM) oprs++;
359
360 PetscCheck(fp = (vfp)PCTFS_ivec_fct_addr(type), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_giop_hc() :: Could not retrieve function pointer!");
361
362 for (mask = 1, edge = 0; edge < dim; edge++, mask <<= 1) {
363 dest = PCTFS_my_id ^ mask;
364 if (PCTFS_my_id > dest) {
365 PetscCallMPI(MPI_Send(vals, n, MPIU_INT, dest, MSGTAG2 + PCTFS_my_id, MPI_COMM_WORLD));
366 } else {
367 PetscCallMPI(MPI_Recv(work, n, MPIU_INT, MPI_ANY_SOURCE, MSGTAG2 + dest, MPI_COMM_WORLD, &status));
368 PetscCall((*fp)(vals, work, n, oprs));
369 }
370 }
371
372 if (edge == dim) mask >>= 1;
373 else {
374 while (++edge < dim) mask <<= 1;
375 }
376
377 for (edge = 0; edge < dim; edge++, mask >>= 1) {
378 if (PCTFS_my_id % mask) continue;
379
380 dest = PCTFS_my_id ^ mask;
381 if (PCTFS_my_id < dest) {
382 PetscCallMPI(MPI_Send(vals, n, MPIU_INT, dest, MSGTAG4 + PCTFS_my_id, MPI_COMM_WORLD));
383 } else {
384 PetscCallMPI(MPI_Recv(vals, n, MPIU_INT, MPI_ANY_SOURCE, MSGTAG4 + dest, MPI_COMM_WORLD, &status));
385 }
386 }
387 PetscFunctionReturn(PETSC_SUCCESS);
388 }
389