xref: /petsc/src/vec/is/utils/psort.c (revision f97672e55eacc8688507b9471cd7ec2664d7f203)
1 
2 #include <petsc/private/petscimpl.h>
3 #include <petscis.h> /*I "petscis.h" I*/
4 
5 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7 {
8   PetscInt       diff;
9   PetscInt       split, mid, partner;
10 
11   PetscFunctionBegin;
12   diff = rankEnd - rankStart;
13   if (diff <= 0) PetscFunctionReturn(0);
14   if (diff == 1) {
15     if (forward) {
16       PetscCall(PetscSortInt((PetscInt) n, keys));
17     } else {
18       PetscCall(PetscSortReverseInt((PetscInt) n, keys));
19     }
20     PetscFunctionReturn(0);
21   }
22   split = 1;
23   while (2 * split < diff) split *= 2;
24   mid = rankStart + split;
25   if (rank < mid) {
26     partner = rank + split;
27   } else {
28     partner = rank - split;
29   }
30   if (partner < rankEnd) {
31     PetscMPIInt i;
32 
33     PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
34     if ((rank < partner) == (forward == PETSC_TRUE)) {
35       for (i = 0; i < n; i++) {
36         keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
37       }
38     } else {
39       for (i = 0; i < n; i++) {
40         keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
41       }
42     }
43   }
44   /* divide and conquer */
45   if (rank < mid) {
46     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
47   } else {
48     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
49   }
50   PetscFunctionReturn(0);
51 }
52 
53 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
54 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
55 {
56   PetscInt       diff;
57   PetscInt       mid;
58 
59   PetscFunctionBegin;
60   diff = rankEnd - rankStart;
61   if (diff <= 0) PetscFunctionReturn(0);
62   if (diff == 1) {
63     if (forward) {
64       PetscCall(PetscSortInt(n, keys));
65     } else {
66       PetscCall(PetscSortReverseInt(n, keys));
67     }
68     PetscFunctionReturn(0);
69   }
70   mid = rankStart + diff / 2;
71   /* divide and conquer */
72   if (rank < mid) {
73     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward));
74   } else {
75     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
76   }
77   /* bitonic merge */
78   PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
79   PetscFunctionReturn(0);
80 }
81 
82 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
83 {
84   PetscMPIInt size, rank, tag, mpin;
85   PetscInt       *buffer;
86 
87   PetscFunctionBegin;
88   PetscValidIntPointer(keys, 3);
89   PetscCall(PetscCommGetNewTag(comm, &tag));
90   PetscCallMPI(MPI_Comm_size(comm, &size));
91   PetscCallMPI(MPI_Comm_rank(comm, &rank));
92   PetscCall(PetscMPIIntCast(n, &mpin));
93   PetscCall(PetscMalloc1(n, &buffer));
94   PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
95   PetscCall(PetscFree(buffer));
96   PetscFunctionReturn(0);
97 }
98 
99 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
100 {
101   PetscMPIInt    size, rank;
102   PetscInt       *pivots, *finalpivots, i;
103   PetscInt       non_empty, my_first, count;
104   PetscMPIInt    *keys_per, max_keys_per;
105 
106   PetscFunctionBegin;
107   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
108   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
109 
110   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
111   PetscCall(PetscMalloc1(size - 1, &pivots));
112   for (i = 0; i < size - 1; i++) {
113     PetscInt index;
114 
115     if (!mapin->n) {
116       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
117        * processes are in the layout, so those values will be ignored from the end of the sorted
118        * pivots */
119       pivots[i] = PETSC_MAX_INT;
120     } else {
121       /* match the distribution to the desired output described by mapout by getting the index
122        * that is approximately the appropriate fraction through the list */
123       index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N);
124       index = PetscMin(index, (mapin->n - 1));
125       index = PetscMax(index, 0);
126       pivots[i] = keysin[index];
127     }
128   }
129   /* sort the pivots in parallel */
130   PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
131   if (PetscDefined(USE_DEBUG)) {
132     PetscBool sorted;
133 
134     PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
135     PetscCheck(sorted,mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
136   }
137 
138   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
139    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
140   non_empty = size;
141   for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--;
142   PetscCall(PetscCalloc1(size + 1, &keys_per));
143   my_first = -1;
144   if (non_empty) {
145     for (i = 0; i < size - 1; i++) {
146       size_t sample = (i + 1) * non_empty - 1;
147       size_t sample_rank = sample / (size - 1);
148 
149       keys_per[sample_rank]++;
150       if (my_first < 0 && (PetscMPIInt) sample_rank == rank) {
151         my_first = (PetscInt) (sample - sample_rank * (size - 1));
152       }
153     }
154   }
155   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
156   PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
157   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
158    * and allgather them */
159   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
160   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
161   PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
162   for (i = 0, count = 0; i < size; i++) {
163     PetscInt j;
164 
165     for (j = 0; j < max_keys_per; j++) {
166       if (j < keys_per[i]) {
167         finalpivots[count++] = finalpivots[i * max_keys_per + j];
168       }
169     }
170   }
171   *outpivots = finalpivots;
172   PetscCall(PetscFree(keys_per));
173   PetscCall(PetscFree(pivots));
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
178 {
179   PetscMPIInt  size, rank;
180   PetscInt     myOffset, nextOffset;
181   PetscInt     i;
182   PetscMPIInt  total, filled;
183   PetscMPIInt  sender, nfirst, nsecond;
184   PetscMPIInt  firsttag, secondtag;
185   MPI_Request  firstreqrcv;
186   MPI_Request *firstreqs;
187   MPI_Request *secondreqs;
188   MPI_Status   firststatus;
189 
190   PetscFunctionBegin;
191   PetscCallMPI(MPI_Comm_size(map->comm, &size));
192   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
193   PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
194   PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
195   myOffset = 0;
196   PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
197   PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
198   myOffset = nextOffset - n;
199   total = map->range[rank + 1] - map->range[rank];
200   if (total > 0) {
201     PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
202   }
203   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
204     PetscInt itotal;
205     PetscInt overlap, oStart, oEnd;
206 
207     itotal = map->range[i + 1] - map->range[i];
208     if (itotal <= 0) continue;
209     oStart = PetscMax(myOffset, map->range[i]);
210     oEnd   = PetscMin(nextOffset, map->range[i + 1]);
211     overlap = oEnd - oStart;
212     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
213       /* send first message */
214       PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++])));
215     } else if (overlap > 0) {
216       /* send second message */
217       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
218     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
219       /* send empty second message */
220       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
221     }
222   }
223   filled = 0;
224   sender = -1;
225   if (total > 0) {
226     PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus));
227     sender = firststatus.MPI_SOURCE;
228     PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled));
229   }
230   while (filled < total) {
231     PetscMPIInt mfilled;
232     MPI_Status stat;
233 
234     sender++;
235     PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat));
236     PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled));
237     filled += mfilled;
238   }
239   PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
240   PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
241   PetscCall(PetscFree2(firstreqs, secondreqs));
242   PetscFunctionReturn(0);
243 }
244 
245 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
246 {
247   PetscMPIInt    size, rank;
248   PetscInt       *pivots = NULL, *buffer;
249   PetscInt       i, j;
250   PetscMPIInt    *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
251 
252   PetscFunctionBegin;
253   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
254   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
255   PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
256   /* sort locally */
257   PetscCall(PetscSortInt(mapin->n, keysin));
258   /* get P - 1 pivots */
259   PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
260   /* determine which entries in the sorted array go to which other processes based on the pivots */
261   for (i = 0, j = 0; i < size - 1; i++) {
262     PetscInt prev = j;
263 
264     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
265     offsets_snd[i] = prev;
266     keys_per_snd[i] = j - prev;
267   }
268   offsets_snd[size - 1] = j;
269   keys_per_snd[size - 1] = mapin->n - j;
270   offsets_snd[size] = mapin->n;
271   /* get the incoming sizes */
272   PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
273   offsets_rcv[0] = 0;
274   for (i = 0; i < size; i++) {
275     offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i];
276   }
277   nrecv = offsets_rcv[size];
278   /* all to all exchange */
279   PetscCall(PetscMalloc1(nrecv, &buffer));
280   PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
281   PetscCall(PetscFree(pivots));
282   PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));
283 
284   /* local sort */
285   PetscCall(PetscSortInt(nrecv, buffer));
286 #if defined(PETSC_USE_DEBUG)
287   {
288     PetscBool sorted;
289 
290     PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
291     PetscCheck(sorted,mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
292   }
293 #endif
294 
295   /* redistribute to the desired order */
296   PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
297   PetscCall(PetscFree(buffer));
298   PetscFunctionReturn(0);
299 }
300 
301 /*@
302   PetscParallelSortInt - Globally sort a distributed array of integers
303 
304   Collective
305 
306   Input Parameters:
307 + mapin - PetscLayout describing the distribution of the input keys
308 . mapout - PetscLayout describing the distribution of the output keys
309 - keysin - the pre-sorted array of integers
310 
311   Output Parameter:
312 . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.
313 
314   Level: developer
315 
316   Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
317 
318   - sorts locally
319   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
320   - using to the pivots to repartition the keys by all-to-all exchange
321   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
322   - redistributing to match the mapout layout
323 
324   If keysin != keysout, then keysin will not be changed during PetscParallelSortInt.
325 
326 .seealso: `PetscParallelSortedInt()`
327 @*/
328 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
329 {
330   PetscMPIInt    size;
331   PetscMPIInt    result;
332   PetscInt       *keysincopy = NULL;
333 
334   PetscFunctionBegin;
335   PetscValidPointer(mapin, 1);
336   PetscValidPointer(mapout, 2);
337   PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
338   PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT,mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
339   PetscCall(PetscLayoutSetUp(mapin));
340   PetscCall(PetscLayoutSetUp(mapout));
341   if (mapin->n) PetscValidIntPointer(keysin, 3);
342   if (mapout->n) PetscValidIntPointer(keysout, 4);
343   PetscCheck(mapin->N == mapout->N,mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
344   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
345   if (size == 1) {
346     if (keysout != keysin) {
347       PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt)));
348     }
349     PetscCall(PetscSortInt(mapout->n, keysout));
350     if (size == 1) PetscFunctionReturn(0);
351   }
352   if (keysout != keysin) {
353     PetscCall(PetscMalloc1(mapin->n, &keysincopy));
354     PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt)));
355     keysin = keysincopy;
356   }
357   PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
358 #if defined(PETSC_USE_DEBUG)
359   {
360     PetscBool sorted;
361 
362     PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
363     PetscCheck(sorted,mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
364   }
365 #endif
366   PetscCall(PetscFree(keysincopy));
367   PetscFunctionReturn(0);
368 }
369