xref: /petsc/src/vec/is/utils/psort.c (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 #include <petsc/private/petscimpl.h>
2 #include <petscis.h> /*I "petscis.h" I*/
3 
4 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
5 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
6 {
7   PetscInt diff;
8   PetscInt split, mid, partner;
9 
10   PetscFunctionBegin;
11   diff = rankEnd - rankStart;
12   if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
13   if (diff == 1) {
14     if (forward) {
15       PetscCall(PetscSortInt((PetscInt)n, keys));
16     } else {
17       PetscCall(PetscSortReverseInt((PetscInt)n, keys));
18     }
19     PetscFunctionReturn(PETSC_SUCCESS);
20   }
21   split = 1;
22   while (2 * split < diff) split *= 2;
23   mid = rankStart + split;
24   if (rank < mid) {
25     partner = rank + split;
26   } else {
27     partner = rank - split;
28   }
29   if (partner < rankEnd) {
30     PetscMPIInt i;
31 
32     PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
33     if ((rank < partner) == (forward == PETSC_TRUE)) {
34       for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
35     } else {
36       for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
37     }
38   }
39   /* divide and conquer */
40   if (rank < mid) {
41     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
42   } else {
43     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
44   }
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
49 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
50 {
51   PetscInt diff;
52   PetscInt mid;
53 
54   PetscFunctionBegin;
55   diff = rankEnd - rankStart;
56   if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
57   if (diff == 1) {
58     if (forward) {
59       PetscCall(PetscSortInt(n, keys));
60     } else {
61       PetscCall(PetscSortReverseInt(n, keys));
62     }
63     PetscFunctionReturn(PETSC_SUCCESS);
64   }
65   mid = rankStart + diff / 2;
66   /* divide and conquer */
67   if (rank < mid) {
68     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward));
69   } else {
70     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
71   }
72   /* bitonic merge */
73   PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
74   PetscFunctionReturn(PETSC_SUCCESS);
75 }
76 
77 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
78 {
79   PetscMPIInt size, rank, tag, mpin;
80   PetscInt   *buffer;
81 
82   PetscFunctionBegin;
83   PetscAssertPointer(keys, 3);
84   PetscCall(PetscCommGetNewTag(comm, &tag));
85   PetscCallMPI(MPI_Comm_size(comm, &size));
86   PetscCallMPI(MPI_Comm_rank(comm, &rank));
87   PetscCall(PetscMPIIntCast(n, &mpin));
88   PetscCall(PetscMalloc1(n, &buffer));
89   PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
90   PetscCall(PetscFree(buffer));
91   PetscFunctionReturn(PETSC_SUCCESS);
92 }
93 
94 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
95 {
96   PetscMPIInt  size, rank;
97   PetscInt    *pivots, *finalpivots, i;
98   PetscInt     non_empty, my_first, count;
99   PetscMPIInt *keys_per, max_keys_per;
100 
101   PetscFunctionBegin;
102   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
103   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
104 
105   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
106   PetscCall(PetscMalloc1(size - 1, &pivots));
107   for (i = 0; i < size - 1; i++) {
108     PetscInt index;
109 
110     if (!mapin->n) {
111       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
112        * processes are in the layout, so those values will be ignored from the end of the sorted
113        * pivots */
114       pivots[i] = PETSC_MAX_INT;
115     } else {
116       /* match the distribution to the desired output described by mapout by getting the index
117        * that is approximately the appropriate fraction through the list */
118       index     = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
119       index     = PetscMin(index, (mapin->n - 1));
120       index     = PetscMax(index, 0);
121       pivots[i] = keysin[index];
122     }
123   }
124   /* sort the pivots in parallel */
125   PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
126   if (PetscDefined(USE_DEBUG)) {
127     PetscBool sorted;
128 
129     PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
130     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
131   }
132 
133   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
134    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
135   non_empty = size;
136   for (i = 0; i < size; i++)
137     if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
138   PetscCall(PetscCalloc1(size + 1, &keys_per));
139   my_first = -1;
140   if (non_empty) {
141     for (i = 0; i < size - 1; i++) {
142       size_t sample      = (i + 1) * non_empty - 1;
143       size_t sample_rank = sample / (size - 1);
144 
145       keys_per[sample_rank]++;
146       if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
147     }
148   }
149   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
150   PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
151   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
152    * and allgather them */
153   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
154   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
155   PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
156   for (i = 0, count = 0; i < size; i++) {
157     PetscInt j;
158 
159     for (j = 0; j < max_keys_per; j++) {
160       if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
161     }
162   }
163   *outpivots = finalpivots;
164   PetscCall(PetscFree(keys_per));
165   PetscCall(PetscFree(pivots));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
170 {
171   PetscMPIInt  size, rank;
172   PetscInt     myOffset, nextOffset;
173   PetscInt     i;
174   PetscMPIInt  total, filled;
175   PetscMPIInt  sender, nfirst, nsecond;
176   PetscMPIInt  firsttag, secondtag;
177   MPI_Request  firstreqrcv;
178   MPI_Request *firstreqs;
179   MPI_Request *secondreqs;
180   MPI_Status   firststatus;
181 
182   PetscFunctionBegin;
183   PetscCallMPI(MPI_Comm_size(map->comm, &size));
184   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
185   PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
186   PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
187   myOffset = 0;
188   PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
189   PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
190   myOffset = nextOffset - n;
191   total    = map->range[rank + 1] - map->range[rank];
192   if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
193   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
194     PetscInt itotal;
195     PetscInt overlap, oStart, oEnd;
196 
197     itotal = map->range[i + 1] - map->range[i];
198     if (itotal <= 0) continue;
199     oStart  = PetscMax(myOffset, map->range[i]);
200     oEnd    = PetscMin(nextOffset, map->range[i + 1]);
201     overlap = oEnd - oStart;
202     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
203       /* send first message */
204       PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++])));
205     } else if (overlap > 0) {
206       /* send second message */
207       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
208     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
209       /* send empty second message */
210       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
211     }
212   }
213   filled = 0;
214   sender = -1;
215   if (total > 0) {
216     PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus));
217     sender = firststatus.MPI_SOURCE;
218     PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled));
219   }
220   while (filled < total) {
221     PetscMPIInt mfilled;
222     MPI_Status  stat;
223 
224     sender++;
225     PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat));
226     PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled));
227     filled += mfilled;
228   }
229   PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
230   PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
231   PetscCall(PetscFree2(firstreqs, secondreqs));
232   PetscFunctionReturn(PETSC_SUCCESS);
233 }
234 
235 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
236 {
237   PetscMPIInt  size, rank;
238   PetscInt    *pivots = NULL, *buffer;
239   PetscInt     i, j;
240   PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
241 
242   PetscFunctionBegin;
243   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
244   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
245   PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
246   /* sort locally */
247   PetscCall(PetscSortInt(mapin->n, keysin));
248   /* get P - 1 pivots */
249   PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
250   /* determine which entries in the sorted array go to which other processes based on the pivots */
251   for (i = 0, j = 0; i < size - 1; i++) {
252     PetscInt prev = j;
253 
254     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
255     offsets_snd[i]  = prev;
256     keys_per_snd[i] = j - prev;
257   }
258   offsets_snd[size - 1]  = j;
259   keys_per_snd[size - 1] = mapin->n - j;
260   offsets_snd[size]      = mapin->n;
261   /* get the incoming sizes */
262   PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
263   offsets_rcv[0] = 0;
264   for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
265   nrecv = offsets_rcv[size];
266   /* all to all exchange */
267   PetscCall(PetscMalloc1(nrecv, &buffer));
268   PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
269   PetscCall(PetscFree(pivots));
270   PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));
271 
272   /* local sort */
273   PetscCall(PetscSortInt(nrecv, buffer));
274 #if defined(PETSC_USE_DEBUG)
275   {
276     PetscBool sorted;
277 
278     PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
279     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
280   }
281 #endif
282 
283   /* redistribute to the desired order */
284   PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
285   PetscCall(PetscFree(buffer));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 /*@
290   PetscParallelSortInt - Globally sort a distributed array of integers
291 
292   Collective
293 
294   Input Parameters:
295 + mapin  - `PetscLayout` describing the distribution of the input keys
296 . mapout - `PetscLayout` describing the desired distribution of the output keys
297 - keysin - the pre-sorted array of integers
298 
299   Output Parameter:
300 . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.
301 
302   Level: developer
303 
304   Notes:
305 
306   This implements a distributed samplesort, which, with local array sizes n_in and n_out,
307   global size N, and global number of MPI processes P, does\:
308 .vb
309   - sorts locally
310   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
311   - using to the pivots to repartition the keys by all-to-all exchange
312   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
313   - redistributing to match the mapout layout
314 .ve
315 
316   If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`.
317 
318 .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
319 @*/
320 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
321 {
322   PetscMPIInt size;
323   PetscMPIInt result;
324   PetscInt   *keysincopy = NULL;
325 
326   PetscFunctionBegin;
327   PetscAssertPointer(mapin, 1);
328   PetscAssertPointer(mapout, 2);
329   PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
330   PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
331   PetscCall(PetscLayoutSetUp(mapin));
332   PetscCall(PetscLayoutSetUp(mapout));
333   if (mapin->n) PetscAssertPointer(keysin, 3);
334   if (mapout->n) PetscAssertPointer(keysout, 4);
335   PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
336   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
337   if (size == 1) {
338     if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt)));
339     PetscCall(PetscSortInt(mapout->n, keysout));
340     if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
341   }
342   if (keysout != keysin) {
343     PetscCall(PetscMalloc1(mapin->n, &keysincopy));
344     PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt)));
345     keysin = keysincopy;
346   }
347   PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
348 #if defined(PETSC_USE_DEBUG)
349   {
350     PetscBool sorted;
351 
352     PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
353     PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
354   }
355 #endif
356   PetscCall(PetscFree(keysincopy));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359