xref: /petsc/src/vec/is/utils/psort.c (revision 0e03b746557e2551025fde0294144c0532d12f68)
1 
2 #include <petsc/private/petscimpl.h>
3 #include <petscis.h> /*I "petscis.h" I*/
4 
5 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7 {
8   PetscInt       diff;
9   PetscInt       split, mid, partner;
10   PetscErrorCode ierr;
11 
12   PetscFunctionBegin;
13   diff = rankEnd - rankStart;
14   if (diff <= 0) PetscFunctionReturn(0);
15   if (diff == 1) {
16     if (forward) {
17       ierr = PetscSortInt((PetscInt) n, keys);CHKERRQ(ierr);
18     } else {
19       ierr = PetscSortReverseInt((PetscInt) n, keys);CHKERRQ(ierr);
20     }
21     PetscFunctionReturn(0);
22   }
23   split = 1;
24   while (2 * split < diff) split *= 2;
25   mid = rankStart + split;
26   if (rank < mid) {
27     partner = rank + split;
28   } else {
29     partner = rank - split;
30   }
31   if (partner < rankEnd) {
32     PetscMPIInt i;
33 
34     ierr = MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);CHKERRQ(ierr);
35     if ((rank < partner) == (forward == PETSC_TRUE)) {
36       for (i = 0; i < n; i++) {
37         keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
38       }
39     } else {
40       for (i = 0; i < n; i++) {
41         keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
42       }
43     }
44   }
45   /* divide and conquer */
46   if (rank < mid) {
47     ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);CHKERRQ(ierr);
48   } else {
49     ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
50   }
51   PetscFunctionReturn(0);
52 }
53 
54 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
55 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
56 {
57   PetscInt       diff;
58   PetscInt       mid;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   diff = rankEnd - rankStart;
63   if (diff <= 0) PetscFunctionReturn(0);
64   if (diff == 1) {
65     if (forward) {
66       ierr = PetscSortInt(n, keys);CHKERRQ(ierr);
67     } else {
68       ierr = PetscSortReverseInt(n, keys);CHKERRQ(ierr);
69     }
70     PetscFunctionReturn(0);
71   }
72   mid = rankStart + diff / 2;
73   /* divide and conquer */
74   if (rank < mid) {
75     ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward);CHKERRQ(ierr);
76   } else {
77     ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
78   }
79   /* bitonic merge */
80   ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr);
81   PetscFunctionReturn(0);
82 }
83 
84 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
85 {
86   PetscMPIInt size, rank, tag, mpin;
87   PetscInt       *buffer;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   PetscValidIntPointer(keys, 3);
92   ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr);
93   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
94   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
95   ierr = PetscMPIIntCast(n, &mpin);CHKERRQ(ierr);
96   ierr = PetscMalloc1(n, &buffer);CHKERRQ(ierr);
97   ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);CHKERRQ(ierr);
98   ierr = PetscFree(buffer);CHKERRQ(ierr);
99   PetscFunctionReturn(0);
100 }
101 
102 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
103 {
104   PetscMPIInt    size, rank;
105   PetscInt       *pivots, *finalpivots, i;
106   PetscInt       non_empty, my_first, count;
107   PetscMPIInt    *keys_per, max_keys_per;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr);
112   ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr);
113 
114   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
115   ierr = PetscMalloc1(size - 1, &pivots);CHKERRQ(ierr);
116   for (i = 0; i < size - 1; i++) {
117     PetscInt index;
118 
119     if (!mapin->n) {
120       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
121        * processes are in the layout, so those values will be ignored from the end of the sorted
122        * pivots */
123       pivots[i] = PETSC_MAX_INT;
124     } else {
125       /* match the distribution to the desired output described by mapout by getting the index
126        * that is approximately the appropriate fraction through the list */
127       index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N);
128       index = PetscMin(index, (mapin->n - 1));
129       index = PetscMax(index, 0);
130       pivots[i] = keysin[index];
131     }
132   }
133   /* sort the pivots in parallel */
134   ierr = PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);CHKERRQ(ierr);
135 #if defined(PETSC_USE_DEBUG)
136   {
137     PetscBool sorted;
138 
139     ierr = PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);CHKERRQ(ierr);
140     if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");CHKERRQ(ierr);
141   }
142 #endif
143 
144   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
145    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
146   non_empty = size;
147   for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--;
148   ierr = PetscCalloc1(size + 1, &keys_per);CHKERRQ(ierr);
149   my_first = -1;
150   if (non_empty) {
151     for (i = 0; i < size - 1; i++) {
152       size_t sample = (i + 1) * non_empty - 1;
153       size_t sample_rank = sample / (size - 1);
154 
155       keys_per[sample_rank]++;
156       if (my_first < 0 && (PetscMPIInt) sample_rank == rank) {
157         my_first = (PetscInt) (sample - sample_rank * (size - 1));
158       }
159     }
160   }
161   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
162   ierr = PetscMalloc1(size * max_keys_per, &finalpivots);CHKERRQ(ierr);
163   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
164    * and allgather them */
165   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
166   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
167   ierr = MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm);
168   for (i = 0, count = 0; i < size; i++) {
169     PetscInt j;
170 
171     for (j = 0; j < max_keys_per; j++) {
172       if (j < keys_per[i]) {
173         finalpivots[count++] = finalpivots[i * max_keys_per + j];
174       }
175     }
176   }
177   *outpivots = finalpivots;
178   ierr = PetscFree(keys_per);CHKERRQ(ierr);
179   ierr = PetscFree(pivots);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
184 {
185   PetscMPIInt    size, rank;
186   PetscInt       myOffset, nextOffset;
187   PetscInt       i;
188   PetscMPIInt    total, filled;
189   PetscMPIInt    sender, nfirst, nsecond;
190   PetscMPIInt    firsttag, secondtag;
191   MPI_Request    firstreqrcv;
192   MPI_Request    *firstreqs;
193   MPI_Request    *secondreqs;
194   MPI_Status     firststatus;
195 
196   PetscErrorCode ierr;
197 
198   PetscFunctionBegin;
199   ierr = MPI_Comm_size(map->comm, &size);CHKERRQ(ierr);
200   ierr = MPI_Comm_rank(map->comm, &rank);CHKERRQ(ierr);
201   ierr = PetscCommGetNewTag(map->comm, &firsttag);CHKERRQ(ierr);
202   ierr = PetscCommGetNewTag(map->comm, &secondtag);CHKERRQ(ierr);
203   myOffset = 0;
204   ierr = PetscMalloc2(size, &firstreqs, size, &secondreqs);CHKERRQ(ierr);
205   ierr = MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);CHKERRQ(ierr);
206   myOffset = nextOffset - n;
207   total = map->range[rank + 1] - map->range[rank];
208   if (total > 0) {
209     ierr = MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);CHKERRQ(ierr);
210   }
211   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
212     PetscInt itotal;
213     PetscInt overlap, oStart, oEnd;
214 
215     itotal = map->range[i + 1] - map->range[i];
216     if (itotal <= 0) continue;
217     oStart = PetscMax(myOffset, map->range[i]);
218     oEnd   = PetscMin(nextOffset, map->range[i + 1]);
219     overlap = oEnd - oStart;
220     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
221       /* send first message */
222       ierr = MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));CHKERRQ(ierr);
223     } else if (overlap > 0) {
224       /* send second message */
225       ierr = MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr);
226     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
227       /* send empty second message */
228       ierr = MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr);
229     }
230   }
231   filled = 0;
232   sender = -1;
233   if (total > 0) {
234     ierr = MPI_Wait(&firstreqrcv, &firststatus);CHKERRQ(ierr);
235     sender = firststatus.MPI_SOURCE;
236     ierr = MPI_Get_count(&firststatus, MPIU_INT, &filled);CHKERRQ(ierr);
237   }
238   while (filled < total) {
239     PetscMPIInt mfilled;
240     MPI_Status stat;
241 
242     sender++;
243     ierr = MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);CHKERRQ(ierr);
244     ierr = MPI_Get_count(&stat, MPIU_INT, &mfilled);CHKERRQ(ierr);
245     filled += mfilled;
246   }
247   ierr = MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
248   ierr = MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
249   ierr = PetscFree2(firstreqs, secondreqs);CHKERRQ(ierr);
250   PetscFunctionReturn(0);
251 }
252 
253 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
254 {
255   PetscMPIInt    size, rank;
256   PetscInt       *pivots = NULL, *buffer;
257   PetscInt       i, j;
258   PetscMPIInt    *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
259   PetscErrorCode ierr;
260 
261   PetscFunctionBegin;
262   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr);
263   ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr);
264   ierr = PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);CHKERRQ(ierr);
265   /* sort locally */
266   ierr = PetscSortInt(mapin->n, keysin);CHKERRQ(ierr);
267   /* get P - 1 pivots */
268   ierr = PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);CHKERRQ(ierr);
269   /* determine which entries in the sorted array go to which other processes based on the pivots */
270   for (i = 0, j = 0; i < size - 1; i++) {
271     PetscInt prev = j;
272 
273     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
274     offsets_snd[i] = prev;
275     keys_per_snd[i] = j - prev;
276   }
277   offsets_snd[size - 1] = j;
278   keys_per_snd[size - 1] = mapin->n - j;
279   offsets_snd[size] = mapin->n;
280   /* get the incoming sizes */
281   ierr = MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);CHKERRQ(ierr);
282   offsets_rcv[0] = 0;
283   for (i = 0; i < size; i++) {
284     offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i];
285   }
286   nrecv = offsets_rcv[size];
287   /* all to all exchange */
288   ierr = PetscMalloc1(nrecv, &buffer);CHKERRQ(ierr);
289   ierr = MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);CHKERRQ(ierr);
290   ierr = PetscFree(pivots);CHKERRQ(ierr);
291   ierr = PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);CHKERRQ(ierr);
292 
293   /* local sort */
294   ierr = PetscSortInt(nrecv, buffer);CHKERRQ(ierr);
295 #if defined(PETSC_USE_DEBUG)
296   {
297     PetscBool sorted;
298 
299     ierr = PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);CHKERRQ(ierr);
300     if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");CHKERRQ(ierr);
301   }
302 #endif
303 
304   /* redistribute to the desired order */
305   ierr = PetscParallelRedistribute(mapout, nrecv, buffer, keysout);CHKERRQ(ierr);
306   ierr = PetscFree(buffer);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 /*@
311   PetscParallelSortInt - Globally sort a distributed array of integers
312 
313   Collective
314 
315   Input Parameters:
316 + mapin - PetscLayout describing the distribution of the input keys
317 . mapout - PetscLayout describing the distribution of the output keys
318 - keysin - the pre-sorted array of integers
319 
320   Output Parameter:
321 . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.
322 
323   Level: developer
324 
325   Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
326 
327   - sorts locally
328   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
329   - using to the pivots to repartition the keys by all-to-all exchange
330   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
331   - redistributing to match the mapout layout
332 
333   If keysin != keysout, then keysin will not be changed during PetscParallelSortInt.
334 
335 .seealso: PetscParallelSortedInt()
336 @*/
337 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
338 {
339   PetscMPIInt    size;
340   PetscMPIInt    result;
341   PetscInt       *keysincopy = NULL;
342   PetscErrorCode ierr;
343 
344   PetscFunctionBegin;
345   PetscValidPointer(mapin, 1);
346   PetscValidPointer(mapout, 2);
347   ierr = MPI_Comm_compare(mapin->comm, mapout->comm, &result);CHKERRQ(ierr);
348   if (result != MPI_IDENT && result != MPI_CONGRUENT) SETERRQ(mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
349   ierr = PetscLayoutSetUp(mapin);CHKERRQ(ierr);
350   ierr = PetscLayoutSetUp(mapout);CHKERRQ(ierr);
351   if (mapin->n) PetscValidIntPointer(keysin, 3);
352   if (mapout->n) PetscValidIntPointer(keysout, 4);
353   if (mapin->N != mapout->N) SETERRQ2(mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%D != %D)", mapin->N, mapout->N);
354   ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr);
355   if (size == 1) {
356     if (keysout != keysin) {
357       ierr = PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr);
358     }
359     ierr = PetscSortInt(mapout->n, keysout);CHKERRQ(ierr);
360     if (size == 1) PetscFunctionReturn(0);
361   }
362   if (keysout != keysin) {
363     ierr = PetscMalloc1(mapin->n, &keysincopy);CHKERRQ(ierr);
364     ierr = PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr);
365     keysin = keysincopy;
366   }
367   ierr = PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);CHKERRQ(ierr);
368 #if defined(PETSC_USE_DEBUG)
369   {
370     PetscBool sorted;
371 
372     ierr = PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);CHKERRQ(ierr);
373     if (!sorted) SETERRQ(mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");CHKERRQ(ierr);
374   }
375 #endif
376   ierr = PetscFree(keysincopy);CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379