1 #include <petsc/private/petscimpl.h>
2 #include <petscis.h> /*I "petscis.h" I*/
3
4 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt rankStart,PetscMPIInt rankEnd,PetscMPIInt rank,PetscMPIInt n,PetscInt keys[],PetscInt buffer[],PetscBool forward)5 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
6 {
7 PetscInt diff;
8 PetscMPIInt split, mid, partner;
9
10 PetscFunctionBegin;
11 diff = rankEnd - rankStart;
12 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
13 if (diff == 1) {
14 if (forward) {
15 PetscCall(PetscSortInt(n, keys));
16 } else {
17 PetscCall(PetscSortReverseInt(n, keys));
18 }
19 PetscFunctionReturn(PETSC_SUCCESS);
20 }
21 split = 1;
22 while (2 * split < diff) split *= 2;
23 mid = rankStart + split;
24 if (rank < mid) {
25 partner = rank + split;
26 } else {
27 partner = rank - split;
28 }
29 if (partner < rankEnd) {
30 PetscMPIInt i;
31
32 PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
33 if ((rank < partner) == (forward == PETSC_TRUE)) {
34 for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
35 } else {
36 for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
37 }
38 }
39 /* divide and conquer */
40 if (rank < mid) {
41 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
42 } else {
43 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
44 }
45 PetscFunctionReturn(PETSC_SUCCESS);
46 }
47
48 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt rankStart,PetscMPIInt rankEnd,PetscMPIInt rank,PetscMPIInt n,PetscInt keys[],PetscInt buffer[],PetscBool forward)49 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
50 {
51 PetscMPIInt diff, mid;
52
53 PetscFunctionBegin;
54 diff = rankEnd - rankStart;
55 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
56 if (diff == 1) {
57 if (forward) {
58 PetscCall(PetscSortInt(n, keys));
59 } else {
60 PetscCall(PetscSortReverseInt(n, keys));
61 }
62 PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 mid = rankStart + diff / 2;
65 /* divide and conquer */
66 if (rank < mid) {
67 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward));
68 } else {
69 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
70 }
71 /* bitonic merge */
72 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
73 PetscFunctionReturn(PETSC_SUCCESS);
74 }
75
PetscParallelSortInt_Bitonic(MPI_Comm comm,PetscInt n,PetscInt keys[])76 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
77 {
78 PetscMPIInt size, rank, tag, mpin;
79 PetscInt *buffer;
80
81 PetscFunctionBegin;
82 PetscAssertPointer(keys, 3);
83 PetscCall(PetscCommGetNewTag(comm, &tag));
84 PetscCallMPI(MPI_Comm_size(comm, &size));
85 PetscCallMPI(MPI_Comm_rank(comm, &rank));
86 PetscCall(PetscMPIIntCast(n, &mpin));
87 PetscCall(PetscMalloc1(n, &buffer));
88 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
89 PetscCall(PetscFree(buffer));
90 PetscFunctionReturn(PETSC_SUCCESS);
91 }
92
PetscParallelSampleSelect(PetscLayout mapin,PetscLayout mapout,PetscInt keysin[],PetscInt * outpivots[])93 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
94 {
95 PetscMPIInt size, rank;
96 PetscInt *pivots, *finalpivots, i;
97 PetscInt non_empty, my_first, count;
98 PetscMPIInt *keys_per, max_keys_per;
99
100 PetscFunctionBegin;
101 PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
102 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
103
104 /* Choose P - 1 pivots that would be ideal for the distribution on this process */
105 PetscCall(PetscMalloc1(size - 1, &pivots));
106 for (i = 0; i < size - 1; i++) {
107 PetscInt index;
108
109 if (!mapin->n) {
110 /* if this rank is empty, put "infinity" in the list. each process knows how many empty
111 * processes are in the layout, so those values will be ignored from the end of the sorted
112 * pivots */
113 pivots[i] = PETSC_INT_MAX;
114 } else {
115 /* match the distribution to the desired output described by mapout by getting the index
116 * that is approximately the appropriate fraction through the list */
117 index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
118 index = PetscMin(index, mapin->n - 1);
119 index = PetscMax(index, 0);
120 pivots[i] = keysin[index];
121 }
122 }
123 /* sort the pivots in parallel */
124 PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
125 if (PetscDefined(USE_DEBUG)) {
126 PetscBool sorted;
127
128 PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
129 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
130 }
131
132 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
133 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
134 non_empty = size;
135 for (i = 0; i < size; i++)
136 if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
137 PetscCall(PetscCalloc1(size + 1, &keys_per));
138 my_first = -1;
139 if (non_empty) {
140 for (i = 0; i < size - 1; i++) {
141 size_t sample = (i + 1) * non_empty - 1;
142 size_t sample_rank = sample / (size - 1);
143
144 keys_per[sample_rank]++;
145 if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
146 }
147 }
148 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
149 PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
150 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
151 * and allgather them */
152 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
153 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_INT_MAX;
154 PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
155 for (i = 0, count = 0; i < size; i++) {
156 PetscInt j;
157
158 for (j = 0; j < max_keys_per; j++) {
159 if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
160 }
161 }
162 *outpivots = finalpivots;
163 PetscCall(PetscFree(keys_per));
164 PetscCall(PetscFree(pivots));
165 PetscFunctionReturn(PETSC_SUCCESS);
166 }
167
PetscParallelRedistribute(PetscLayout map,PetscInt n,PetscInt arrayin[],PetscInt arrayout[])168 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
169 {
170 PetscMPIInt size, rank;
171 PetscInt myOffset, nextOffset;
172 PetscCount total;
173 PetscMPIInt nfirst, nsecond;
174 PetscMPIInt firsttag, secondtag;
175 MPI_Request firstreqrcv;
176 MPI_Request *firstreqs;
177 MPI_Request *secondreqs;
178
179 PetscFunctionBegin;
180 PetscCallMPI(MPI_Comm_size(map->comm, &size));
181 PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
182 PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
183 PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
184 PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
185 PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
186 myOffset = nextOffset - n;
187 total = map->range[rank + 1] - map->range[rank];
188 if (total > 0) PetscCallMPI(MPIU_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
189 nsecond = 0;
190 nfirst = 0;
191 for (PetscMPIInt i = 0; i < size; i++) {
192 PetscInt itotal;
193 PetscInt overlap, oStart, oEnd;
194
195 itotal = map->range[i + 1] - map->range[i];
196 if (itotal <= 0) continue;
197 oStart = PetscMax(myOffset, map->range[i]);
198 oEnd = PetscMin(nextOffset, map->range[i + 1]);
199 overlap = oEnd - oStart;
200 if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
201 /* send first message */
202 PetscCallMPI(MPIU_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &firstreqs[nfirst++]));
203 } else if (overlap > 0) {
204 /* send second message */
205 PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++]));
206 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
207 /* send empty second message */
208 PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++]));
209 }
210 }
211 if (total > 0) {
212 MPI_Status status;
213 PetscMPIInt sender = -1;
214 PetscCount filled = 0;
215
216 PetscCallMPI(MPI_Wait(&firstreqrcv, &status));
217 sender = status.MPI_SOURCE;
218 PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &filled));
219 while (filled < total) {
220 PetscCount mfilled;
221
222 sender++;
223 PetscCallMPI(MPIU_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &status));
224 PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &mfilled));
225 filled += mfilled;
226 }
227 }
228 PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
229 PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
230 PetscCall(PetscFree2(firstreqs, secondreqs));
231 PetscFunctionReturn(PETSC_SUCCESS);
232 }
233
PetscParallelSortInt_Samplesort(PetscLayout mapin,PetscLayout mapout,PetscInt keysin[],PetscInt keysout[])234 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
235 {
236 PetscMPIInt size, rank;
237 PetscInt *pivots = NULL, *buffer;
238 PetscInt j;
239 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
240
241 PetscFunctionBegin;
242 PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
243 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
244 PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
245 /* sort locally */
246 PetscCall(PetscSortInt(mapin->n, keysin));
247 /* get P - 1 pivots */
248 PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
249 /* determine which entries in the sorted array go to which other processes based on the pivots */
250 j = 0;
251 for (PetscMPIInt i = 0; i < size - 1; i++) {
252 PetscInt prev = j;
253
254 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
255 PetscCall(PetscMPIIntCast(prev, &offsets_snd[i]));
256 PetscCall(PetscMPIIntCast(j - prev, &keys_per_snd[i]));
257 }
258 PetscCall(PetscMPIIntCast(j, &offsets_snd[size - 1]));
259 PetscCall(PetscMPIIntCast(mapin->n - j, &keys_per_snd[size - 1]));
260 PetscCall(PetscMPIIntCast(mapin->n, &offsets_snd[size]));
261 /* get the incoming sizes */
262 PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
263 offsets_rcv[0] = 0;
264 for (PetscMPIInt i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
265 nrecv = offsets_rcv[size];
266 /* all to all exchange */
267 PetscCall(PetscMalloc1(nrecv, &buffer));
268 PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
269 PetscCall(PetscFree(pivots));
270 PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));
271
272 /* local sort */
273 PetscCall(PetscSortInt(nrecv, buffer));
274 #if defined(PETSC_USE_DEBUG)
275 {
276 PetscBool sorted;
277
278 PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
279 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
280 }
281 #endif
282
283 /* redistribute to the desired order */
284 PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
285 PetscCall(PetscFree(buffer));
286 PetscFunctionReturn(PETSC_SUCCESS);
287 }
288
289 /*@
290 PetscParallelSortInt - Globally sort a distributed array of integers
291
292 Collective
293
294 Input Parameters:
295 + mapin - `PetscLayout` describing the distribution of the input keys
296 . mapout - `PetscLayout` describing the desired distribution of the output keys
297 - keysin - the pre-sorted array of integers
298
299 Output Parameter:
300 . keysout - the array in which the sorted integers will be stored. If `mapin` == `mapout`, then `keysin` may be equal to `keysout`.
301
302 Level: developer
303
304 Notes:
305
306 This implements a distributed samplesort, which, with local array sizes n_in and n_out,
307 global size N, and global number of MPI processes P, does\:
308 .vb
309 - sorts locally
310 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
311 - using to the pivots to repartition the keys by all-to-all exchange
312 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
313 - redistributing to match the mapout layout
314 .ve
315
316 If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`.
317
318 .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
319 @*/
PetscParallelSortInt(PetscLayout mapin,PetscLayout mapout,PetscInt keysin[],PetscInt keysout[])320 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
321 {
322 PetscMPIInt size;
323 PetscMPIInt result;
324 PetscInt *keysincopy = NULL;
325
326 PetscFunctionBegin;
327 PetscAssertPointer(mapin, 1);
328 PetscAssertPointer(mapout, 2);
329 PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
330 PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
331 PetscCall(PetscLayoutSetUp(mapin));
332 PetscCall(PetscLayoutSetUp(mapout));
333 if (mapin->n) PetscAssertPointer(keysin, 3);
334 if (mapout->n) PetscAssertPointer(keysout, 4);
335 PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
336 PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
337 if (size == 1) {
338 if (keysout != keysin) PetscCall(PetscArraycpy(keysout, keysin, mapin->n));
339 PetscCall(PetscSortInt(mapout->n, keysout));
340 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
341 }
342 if (keysout != keysin) {
343 PetscCall(PetscMalloc1(mapin->n, &keysincopy));
344 PetscCall(PetscArraycpy(keysincopy, keysin, mapin->n));
345 keysin = keysincopy;
346 }
347 PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
348 #if defined(PETSC_USE_DEBUG)
349 {
350 PetscBool sorted;
351
352 PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
353 PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
354 }
355 #endif
356 PetscCall(PetscFree(keysincopy));
357 PetscFunctionReturn(PETSC_SUCCESS);
358 }
359