xref: /petsc/src/vec/is/utils/psort.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1 
2 #include <petsc/private/petscimpl.h>
3 #include <petscis.h> /*I "petscis.h" I*/
4 
5 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7 {
8   PetscInt diff;
9   PetscInt split, mid, partner;
10 
11   PetscFunctionBegin;
12   diff = rankEnd - rankStart;
13   if (diff <= 0) PetscFunctionReturn(0);
14   if (diff == 1) {
15     if (forward) {
16       PetscCall(PetscSortInt((PetscInt)n, keys));
17     } else {
18       PetscCall(PetscSortReverseInt((PetscInt)n, keys));
19     }
20     PetscFunctionReturn(0);
21   }
22   split = 1;
23   while (2 * split < diff) split *= 2;
24   mid = rankStart + split;
25   if (rank < mid) {
26     partner = rank + split;
27   } else {
28     partner = rank - split;
29   }
30   if (partner < rankEnd) {
31     PetscMPIInt i;
32 
33     PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
34     if ((rank < partner) == (forward == PETSC_TRUE)) {
35       for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
36     } else {
37       for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
38     }
39   }
40   /* divide and conquer */
41   if (rank < mid) {
42     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
43   } else {
44     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
45   }
46   PetscFunctionReturn(0);
47 }
48 
49 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
50 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
51 {
52   PetscInt diff;
53   PetscInt mid;
54 
55   PetscFunctionBegin;
56   diff = rankEnd - rankStart;
57   if (diff <= 0) PetscFunctionReturn(0);
58   if (diff == 1) {
59     if (forward) {
60       PetscCall(PetscSortInt(n, keys));
61     } else {
62       PetscCall(PetscSortReverseInt(n, keys));
63     }
64     PetscFunctionReturn(0);
65   }
66   mid = rankStart + diff / 2;
67   /* divide and conquer */
68   if (rank < mid) {
69     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward));
70   } else {
71     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
72   }
73   /* bitonic merge */
74   PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
75   PetscFunctionReturn(0);
76 }
77 
78 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
79 {
80   PetscMPIInt size, rank, tag, mpin;
81   PetscInt   *buffer;
82 
83   PetscFunctionBegin;
84   PetscValidIntPointer(keys, 3);
85   PetscCall(PetscCommGetNewTag(comm, &tag));
86   PetscCallMPI(MPI_Comm_size(comm, &size));
87   PetscCallMPI(MPI_Comm_rank(comm, &rank));
88   PetscCall(PetscMPIIntCast(n, &mpin));
89   PetscCall(PetscMalloc1(n, &buffer));
90   PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
91   PetscCall(PetscFree(buffer));
92   PetscFunctionReturn(0);
93 }
94 
95 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
96 {
97   PetscMPIInt  size, rank;
98   PetscInt    *pivots, *finalpivots, i;
99   PetscInt     non_empty, my_first, count;
100   PetscMPIInt *keys_per, max_keys_per;
101 
102   PetscFunctionBegin;
103   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
104   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
105 
106   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
107   PetscCall(PetscMalloc1(size - 1, &pivots));
108   for (i = 0; i < size - 1; i++) {
109     PetscInt index;
110 
111     if (!mapin->n) {
112       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
113        * processes are in the layout, so those values will be ignored from the end of the sorted
114        * pivots */
115       pivots[i] = PETSC_MAX_INT;
116     } else {
117       /* match the distribution to the desired output described by mapout by getting the index
118        * that is approximately the appropriate fraction through the list */
119       index     = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
120       index     = PetscMin(index, (mapin->n - 1));
121       index     = PetscMax(index, 0);
122       pivots[i] = keysin[index];
123     }
124   }
125   /* sort the pivots in parallel */
126   PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
127   if (PetscDefined(USE_DEBUG)) {
128     PetscBool sorted;
129 
130     PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
131     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
132   }
133 
134   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
135    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
136   non_empty = size;
137   for (i = 0; i < size; i++)
138     if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
139   PetscCall(PetscCalloc1(size + 1, &keys_per));
140   my_first = -1;
141   if (non_empty) {
142     for (i = 0; i < size - 1; i++) {
143       size_t sample      = (i + 1) * non_empty - 1;
144       size_t sample_rank = sample / (size - 1);
145 
146       keys_per[sample_rank]++;
147       if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
148     }
149   }
150   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
151   PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
152   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
153    * and allgather them */
154   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
155   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
156   PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
157   for (i = 0, count = 0; i < size; i++) {
158     PetscInt j;
159 
160     for (j = 0; j < max_keys_per; j++) {
161       if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
162     }
163   }
164   *outpivots = finalpivots;
165   PetscCall(PetscFree(keys_per));
166   PetscCall(PetscFree(pivots));
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
171 {
172   PetscMPIInt  size, rank;
173   PetscInt     myOffset, nextOffset;
174   PetscInt     i;
175   PetscMPIInt  total, filled;
176   PetscMPIInt  sender, nfirst, nsecond;
177   PetscMPIInt  firsttag, secondtag;
178   MPI_Request  firstreqrcv;
179   MPI_Request *firstreqs;
180   MPI_Request *secondreqs;
181   MPI_Status   firststatus;
182 
183   PetscFunctionBegin;
184   PetscCallMPI(MPI_Comm_size(map->comm, &size));
185   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
186   PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
187   PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
188   myOffset = 0;
189   PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
190   PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
191   myOffset = nextOffset - n;
192   total    = map->range[rank + 1] - map->range[rank];
193   if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
194   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
195     PetscInt itotal;
196     PetscInt overlap, oStart, oEnd;
197 
198     itotal = map->range[i + 1] - map->range[i];
199     if (itotal <= 0) continue;
200     oStart  = PetscMax(myOffset, map->range[i]);
201     oEnd    = PetscMin(nextOffset, map->range[i + 1]);
202     overlap = oEnd - oStart;
203     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
204       /* send first message */
205       PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++])));
206     } else if (overlap > 0) {
207       /* send second message */
208       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
209     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
210       /* send empty second message */
211       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
212     }
213   }
214   filled = 0;
215   sender = -1;
216   if (total > 0) {
217     PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus));
218     sender = firststatus.MPI_SOURCE;
219     PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled));
220   }
221   while (filled < total) {
222     PetscMPIInt mfilled;
223     MPI_Status  stat;
224 
225     sender++;
226     PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat));
227     PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled));
228     filled += mfilled;
229   }
230   PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
231   PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
232   PetscCall(PetscFree2(firstreqs, secondreqs));
233   PetscFunctionReturn(0);
234 }
235 
236 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
237 {
238   PetscMPIInt  size, rank;
239   PetscInt    *pivots = NULL, *buffer;
240   PetscInt     i, j;
241   PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
242 
243   PetscFunctionBegin;
244   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
245   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
246   PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
247   /* sort locally */
248   PetscCall(PetscSortInt(mapin->n, keysin));
249   /* get P - 1 pivots */
250   PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
251   /* determine which entries in the sorted array go to which other processes based on the pivots */
252   for (i = 0, j = 0; i < size - 1; i++) {
253     PetscInt prev = j;
254 
255     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
256     offsets_snd[i]  = prev;
257     keys_per_snd[i] = j - prev;
258   }
259   offsets_snd[size - 1]  = j;
260   keys_per_snd[size - 1] = mapin->n - j;
261   offsets_snd[size]      = mapin->n;
262   /* get the incoming sizes */
263   PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
264   offsets_rcv[0] = 0;
265   for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
266   nrecv = offsets_rcv[size];
267   /* all to all exchange */
268   PetscCall(PetscMalloc1(nrecv, &buffer));
269   PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
270   PetscCall(PetscFree(pivots));
271   PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));
272 
273   /* local sort */
274   PetscCall(PetscSortInt(nrecv, buffer));
275 #if defined(PETSC_USE_DEBUG)
276   {
277     PetscBool sorted;
278 
279     PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
280     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
281   }
282 #endif
283 
284   /* redistribute to the desired order */
285   PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
286   PetscCall(PetscFree(buffer));
287   PetscFunctionReturn(0);
288 }
289 
290 /*@
291   PetscParallelSortInt - Globally sort a distributed array of integers
292 
293   Collective
294 
295   Input Parameters:
296 + mapin - PetscLayout describing the distribution of the input keys
297 . mapout - PetscLayout describing the distribution of the output keys
298 - keysin - the pre-sorted array of integers
299 
300   Output Parameter:
301 . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.
302 
303   Level: developer
304 
305   Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
306 
307   - sorts locally
308   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
309   - using to the pivots to repartition the keys by all-to-all exchange
310   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
311   - redistributing to match the mapout layout
312 
313   If keysin != keysout, then keysin will not be changed during PetscParallelSortInt.
314 
315 .seealso: `PetscParallelSortedInt()`
316 @*/
317 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
318 {
319   PetscMPIInt size;
320   PetscMPIInt result;
321   PetscInt   *keysincopy = NULL;
322 
323   PetscFunctionBegin;
324   PetscValidPointer(mapin, 1);
325   PetscValidPointer(mapout, 2);
326   PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
327   PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
328   PetscCall(PetscLayoutSetUp(mapin));
329   PetscCall(PetscLayoutSetUp(mapout));
330   if (mapin->n) PetscValidIntPointer(keysin, 3);
331   if (mapout->n) PetscValidIntPointer(keysout, 4);
332   PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
333   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
334   if (size == 1) {
335     if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt)));
336     PetscCall(PetscSortInt(mapout->n, keysout));
337     if (size == 1) PetscFunctionReturn(0);
338   }
339   if (keysout != keysin) {
340     PetscCall(PetscMalloc1(mapin->n, &keysincopy));
341     PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt)));
342     keysin = keysincopy;
343   }
344   PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
345 #if defined(PETSC_USE_DEBUG)
346   {
347     PetscBool sorted;
348 
349     PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
350     PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
351   }
352 #endif
353   PetscCall(PetscFree(keysincopy));
354   PetscFunctionReturn(0);
355 }
356