1ce605777SToby Isaac #include <petsc/private/petscimpl.h>
2579e05a6SToby Isaac #include <petscis.h> /*I "petscis.h" I*/
3ce605777SToby Isaac
4ce605777SToby Isaac /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt rankStart,PetscMPIInt rankEnd,PetscMPIInt rank,PetscMPIInt n,PetscInt keys[],PetscInt buffer[],PetscBool forward)5d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
6d71ae5a4SJacob Faibussowitsch {
7ce605777SToby Isaac PetscInt diff;
86497c311SBarry Smith PetscMPIInt split, mid, partner;
9ce605777SToby Isaac
10ce605777SToby Isaac PetscFunctionBegin;
11ce605777SToby Isaac diff = rankEnd - rankStart;
123ba16761SJacob Faibussowitsch if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
13ce605777SToby Isaac if (diff == 1) {
14ce605777SToby Isaac if (forward) {
156497c311SBarry Smith PetscCall(PetscSortInt(n, keys));
16ce605777SToby Isaac } else {
176497c311SBarry Smith PetscCall(PetscSortReverseInt(n, keys));
18ce605777SToby Isaac }
193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
20ce605777SToby Isaac }
21ce605777SToby Isaac split = 1;
22ce605777SToby Isaac while (2 * split < diff) split *= 2;
23ce605777SToby Isaac mid = rankStart + split;
24ce605777SToby Isaac if (rank < mid) {
25ce605777SToby Isaac partner = rank + split;
26ce605777SToby Isaac } else {
27ce605777SToby Isaac partner = rank - split;
28ce605777SToby Isaac }
29ce605777SToby Isaac if (partner < rankEnd) {
30ce605777SToby Isaac PetscMPIInt i;
31ce605777SToby Isaac
329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
33ce605777SToby Isaac if ((rank < partner) == (forward == PETSC_TRUE)) {
34ad540459SPierre Jolivet for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
35ce605777SToby Isaac } else {
36ad540459SPierre Jolivet for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
37ce605777SToby Isaac }
38ce605777SToby Isaac }
39ce605777SToby Isaac /* divide and conquer */
40ce605777SToby Isaac if (rank < mid) {
419566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
42ce605777SToby Isaac } else {
439566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
44ce605777SToby Isaac }
453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
46ce605777SToby Isaac }
47ce605777SToby Isaac
48ce605777SToby Isaac /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt rankStart,PetscMPIInt rankEnd,PetscMPIInt rank,PetscMPIInt n,PetscInt keys[],PetscInt buffer[],PetscBool forward)49d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
50d71ae5a4SJacob Faibussowitsch {
516497c311SBarry Smith PetscMPIInt diff, mid;
52ce605777SToby Isaac
53ce605777SToby Isaac PetscFunctionBegin;
54ce605777SToby Isaac diff = rankEnd - rankStart;
553ba16761SJacob Faibussowitsch if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
56ce605777SToby Isaac if (diff == 1) {
57ce605777SToby Isaac if (forward) {
589566063dSJacob Faibussowitsch PetscCall(PetscSortInt(n, keys));
59ce605777SToby Isaac } else {
609566063dSJacob Faibussowitsch PetscCall(PetscSortReverseInt(n, keys));
61ce605777SToby Isaac }
623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
63ce605777SToby Isaac }
64ce605777SToby Isaac mid = rankStart + diff / 2;
65ce605777SToby Isaac /* divide and conquer */
66ce605777SToby Isaac if (rank < mid) {
679566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward));
68ce605777SToby Isaac } else {
699566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
70ce605777SToby Isaac }
71ce605777SToby Isaac /* bitonic merge */
729566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
74ce605777SToby Isaac }
75ce605777SToby Isaac
PetscParallelSortInt_Bitonic(MPI_Comm comm,PetscInt n,PetscInt keys[])76d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
77d71ae5a4SJacob Faibussowitsch {
78ce605777SToby Isaac PetscMPIInt size, rank, tag, mpin;
79ce605777SToby Isaac PetscInt *buffer;
80ce605777SToby Isaac
81ce605777SToby Isaac PetscFunctionBegin;
824f572ea9SToby Isaac PetscAssertPointer(keys, 3);
839566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(comm, &tag));
849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
869566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(n, &mpin));
879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &buffer));
889566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
899566063dSJacob Faibussowitsch PetscCall(PetscFree(buffer));
903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
91ce605777SToby Isaac }
92ce605777SToby Isaac
PetscParallelSampleSelect(PetscLayout mapin,PetscLayout mapout,PetscInt keysin[],PetscInt * outpivots[])93d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
94d71ae5a4SJacob Faibussowitsch {
95ce605777SToby Isaac PetscMPIInt size, rank;
96ce605777SToby Isaac PetscInt *pivots, *finalpivots, i;
97ce605777SToby Isaac PetscInt non_empty, my_first, count;
98ce605777SToby Isaac PetscMPIInt *keys_per, max_keys_per;
99ce605777SToby Isaac
100ce605777SToby Isaac PetscFunctionBegin;
1019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
1029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
103ce605777SToby Isaac
104ce605777SToby Isaac /* Choose P - 1 pivots that would be ideal for the distribution on this process */
1059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size - 1, &pivots));
106ce605777SToby Isaac for (i = 0; i < size - 1; i++) {
107ce605777SToby Isaac PetscInt index;
108ce605777SToby Isaac
109ce605777SToby Isaac if (!mapin->n) {
110ce605777SToby Isaac /* if this rank is empty, put "infinity" in the list. each process knows how many empty
111ce605777SToby Isaac * processes are in the layout, so those values will be ignored from the end of the sorted
112ce605777SToby Isaac * pivots */
1131690c2aeSBarry Smith pivots[i] = PETSC_INT_MAX;
114ce605777SToby Isaac } else {
115ce605777SToby Isaac /* match the distribution to the desired output described by mapout by getting the index
116ce605777SToby Isaac * that is approximately the appropriate fraction through the list */
117ce605777SToby Isaac index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
11857508eceSPierre Jolivet index = PetscMin(index, mapin->n - 1);
119ce605777SToby Isaac index = PetscMax(index, 0);
120ce605777SToby Isaac pivots[i] = keysin[index];
121ce605777SToby Isaac }
122ce605777SToby Isaac }
123ce605777SToby Isaac /* sort the pivots in parallel */
1249566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
12576bd3646SJed Brown if (PetscDefined(USE_DEBUG)) {
126ce605777SToby Isaac PetscBool sorted;
127ce605777SToby Isaac
1289566063dSJacob Faibussowitsch PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
12928b400f6SJacob Faibussowitsch PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
130ce605777SToby Isaac }
131ce605777SToby Isaac
132ce605777SToby Isaac /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
133ce605777SToby Isaac * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
134ce605777SToby Isaac non_empty = size;
1359371c9d4SSatish Balay for (i = 0; i < size; i++)
1369371c9d4SSatish Balay if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
1379566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size + 1, &keys_per));
138ce605777SToby Isaac my_first = -1;
139ce605777SToby Isaac if (non_empty) {
140ce605777SToby Isaac for (i = 0; i < size - 1; i++) {
141ce605777SToby Isaac size_t sample = (i + 1) * non_empty - 1;
142ce605777SToby Isaac size_t sample_rank = sample / (size - 1);
143ce605777SToby Isaac
144ce605777SToby Isaac keys_per[sample_rank]++;
145ad540459SPierre Jolivet if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
146ce605777SToby Isaac }
147ce605777SToby Isaac }
148ce605777SToby Isaac for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
1499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
150ce605777SToby Isaac /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
151ce605777SToby Isaac * and allgather them */
152ce605777SToby Isaac for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
1531690c2aeSBarry Smith for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_INT_MAX;
1549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
155ce605777SToby Isaac for (i = 0, count = 0; i < size; i++) {
156ce605777SToby Isaac PetscInt j;
157ce605777SToby Isaac
158ce605777SToby Isaac for (j = 0; j < max_keys_per; j++) {
159ad540459SPierre Jolivet if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
160ce605777SToby Isaac }
161ce605777SToby Isaac }
162ce605777SToby Isaac *outpivots = finalpivots;
1639566063dSJacob Faibussowitsch PetscCall(PetscFree(keys_per));
1649566063dSJacob Faibussowitsch PetscCall(PetscFree(pivots));
1653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
166ce605777SToby Isaac }
167ce605777SToby Isaac
PetscParallelRedistribute(PetscLayout map,PetscInt n,PetscInt arrayin[],PetscInt arrayout[])168d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
169d71ae5a4SJacob Faibussowitsch {
170ce605777SToby Isaac PetscMPIInt size, rank;
171ce605777SToby Isaac PetscInt myOffset, nextOffset;
172dd460d27SBarry Smith PetscCount total;
173dd460d27SBarry Smith PetscMPIInt nfirst, nsecond;
174ce605777SToby Isaac PetscMPIInt firsttag, secondtag;
175ce605777SToby Isaac MPI_Request firstreqrcv;
176ce605777SToby Isaac MPI_Request *firstreqs;
177ce605777SToby Isaac MPI_Request *secondreqs;
178ce605777SToby Isaac
179ce605777SToby Isaac PetscFunctionBegin;
1809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(map->comm, &size));
1819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
1829566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
1839566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
1849566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
1859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
186ce605777SToby Isaac myOffset = nextOffset - n;
187ce605777SToby Isaac total = map->range[rank + 1] - map->range[rank];
1886497c311SBarry Smith if (total > 0) PetscCallMPI(MPIU_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
1896497c311SBarry Smith nsecond = 0;
1906497c311SBarry Smith nfirst = 0;
1916497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) {
192ce605777SToby Isaac PetscInt itotal;
193ce605777SToby Isaac PetscInt overlap, oStart, oEnd;
194ce605777SToby Isaac
195ce605777SToby Isaac itotal = map->range[i + 1] - map->range[i];
196ce605777SToby Isaac if (itotal <= 0) continue;
197ce605777SToby Isaac oStart = PetscMax(myOffset, map->range[i]);
198ce605777SToby Isaac oEnd = PetscMin(nextOffset, map->range[i + 1]);
199ce605777SToby Isaac overlap = oEnd - oStart;
200ce605777SToby Isaac if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
201ce605777SToby Isaac /* send first message */
2026497c311SBarry Smith PetscCallMPI(MPIU_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &firstreqs[nfirst++]));
203ce605777SToby Isaac } else if (overlap > 0) {
204ce605777SToby Isaac /* send second message */
2056497c311SBarry Smith PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++]));
206ce605777SToby Isaac } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
207ce605777SToby Isaac /* send empty second message */
2086497c311SBarry Smith PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++]));
209ce605777SToby Isaac }
210ce605777SToby Isaac }
211ce605777SToby Isaac if (total > 0) {
2126497c311SBarry Smith MPI_Status status;
213dd460d27SBarry Smith PetscMPIInt sender = -1;
214dd460d27SBarry Smith PetscCount filled = 0;
2156497c311SBarry Smith
2166497c311SBarry Smith PetscCallMPI(MPI_Wait(&firstreqrcv, &status));
2176497c311SBarry Smith sender = status.MPI_SOURCE;
2186497c311SBarry Smith PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &filled));
219ce605777SToby Isaac while (filled < total) {
2206497c311SBarry Smith PetscCount mfilled;
221ce605777SToby Isaac
222ce605777SToby Isaac sender++;
2236497c311SBarry Smith PetscCallMPI(MPIU_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &status));
2246497c311SBarry Smith PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &mfilled));
225ce605777SToby Isaac filled += mfilled;
226ce605777SToby Isaac }
2276497c311SBarry Smith }
2289566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
2299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
2309566063dSJacob Faibussowitsch PetscCall(PetscFree2(firstreqs, secondreqs));
2313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
232ce605777SToby Isaac }
233ce605777SToby Isaac
PetscParallelSortInt_Samplesort(PetscLayout mapin,PetscLayout mapout,PetscInt keysin[],PetscInt keysout[])234d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
235d71ae5a4SJacob Faibussowitsch {
236ce605777SToby Isaac PetscMPIInt size, rank;
2376df2664eSToby Isaac PetscInt *pivots = NULL, *buffer;
2386497c311SBarry Smith PetscInt j;
239ce605777SToby Isaac PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
240ce605777SToby Isaac
241ce605777SToby Isaac PetscFunctionBegin;
2429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
2439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
2449566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
245ce605777SToby Isaac /* sort locally */
2469566063dSJacob Faibussowitsch PetscCall(PetscSortInt(mapin->n, keysin));
247ce605777SToby Isaac /* get P - 1 pivots */
2489566063dSJacob Faibussowitsch PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
249ce605777SToby Isaac /* determine which entries in the sorted array go to which other processes based on the pivots */
2506497c311SBarry Smith j = 0;
2516497c311SBarry Smith for (PetscMPIInt i = 0; i < size - 1; i++) {
252ce605777SToby Isaac PetscInt prev = j;
253ce605777SToby Isaac
254ce605777SToby Isaac while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
2556497c311SBarry Smith PetscCall(PetscMPIIntCast(prev, &offsets_snd[i]));
2566497c311SBarry Smith PetscCall(PetscMPIIntCast(j - prev, &keys_per_snd[i]));
257ce605777SToby Isaac }
2586497c311SBarry Smith PetscCall(PetscMPIIntCast(j, &offsets_snd[size - 1]));
2596497c311SBarry Smith PetscCall(PetscMPIIntCast(mapin->n - j, &keys_per_snd[size - 1]));
2606497c311SBarry Smith PetscCall(PetscMPIIntCast(mapin->n, &offsets_snd[size]));
261ce605777SToby Isaac /* get the incoming sizes */
2629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
263ce605777SToby Isaac offsets_rcv[0] = 0;
2646497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
265ce605777SToby Isaac nrecv = offsets_rcv[size];
266ce605777SToby Isaac /* all to all exchange */
2679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrecv, &buffer));
2689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
2699566063dSJacob Faibussowitsch PetscCall(PetscFree(pivots));
2709566063dSJacob Faibussowitsch PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));
271ce605777SToby Isaac
272ce605777SToby Isaac /* local sort */
2739566063dSJacob Faibussowitsch PetscCall(PetscSortInt(nrecv, buffer));
274ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
275ce605777SToby Isaac {
276ce605777SToby Isaac PetscBool sorted;
277ce605777SToby Isaac
2789566063dSJacob Faibussowitsch PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
27928b400f6SJacob Faibussowitsch PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
280ce605777SToby Isaac }
281ce605777SToby Isaac #endif
282ce605777SToby Isaac
283ce605777SToby Isaac /* redistribute to the desired order */
2849566063dSJacob Faibussowitsch PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
2859566063dSJacob Faibussowitsch PetscCall(PetscFree(buffer));
2863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
287ce605777SToby Isaac }
288ce605777SToby Isaac
289ce605777SToby Isaac /*@
290064ec1f3Sprj- PetscParallelSortInt - Globally sort a distributed array of integers
291ce605777SToby Isaac
292ce605777SToby Isaac Collective
293ce605777SToby Isaac
294ce605777SToby Isaac Input Parameters:
295cab54364SBarry Smith + mapin - `PetscLayout` describing the distribution of the input keys
296cab54364SBarry Smith . mapout - `PetscLayout` describing the desired distribution of the output keys
297ce605777SToby Isaac - keysin - the pre-sorted array of integers
298ce605777SToby Isaac
299064ec1f3Sprj- Output Parameter:
3006497c311SBarry Smith . keysout - the array in which the sorted integers will be stored. If `mapin` == `mapout`, then `keysin` may be equal to `keysout`.
301ce605777SToby Isaac
302ce605777SToby Isaac Level: developer
303ce605777SToby Isaac
304cab54364SBarry Smith Notes:
30538b5cf2dSJacob Faibussowitsch
306e33f79d8SJacob Faibussowitsch This implements a distributed samplesort, which, with local array sizes n_in and n_out,
307e33f79d8SJacob Faibussowitsch global size N, and global number of MPI processes P, does\:
308cab54364SBarry Smith .vb
309ce605777SToby Isaac - sorts locally
310ce605777SToby Isaac - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
311ce605777SToby Isaac - using to the pivots to repartition the keys by all-to-all exchange
312ce605777SToby Isaac - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
313ce605777SToby Isaac - redistributing to match the mapout layout
314cab54364SBarry Smith .ve
315ce605777SToby Isaac
316e33f79d8SJacob Faibussowitsch If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`.
317ce605777SToby Isaac
318cab54364SBarry Smith .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
319ce605777SToby Isaac @*/
PetscParallelSortInt(PetscLayout mapin,PetscLayout mapout,PetscInt keysin[],PetscInt keysout[])320d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
321d71ae5a4SJacob Faibussowitsch {
322ce605777SToby Isaac PetscMPIInt size;
323ce605777SToby Isaac PetscMPIInt result;
324ce605777SToby Isaac PetscInt *keysincopy = NULL;
325ce605777SToby Isaac
326ce605777SToby Isaac PetscFunctionBegin;
3274f572ea9SToby Isaac PetscAssertPointer(mapin, 1);
3284f572ea9SToby Isaac PetscAssertPointer(mapout, 2);
3299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
330c9cc58a2SBarry Smith PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
3319566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mapin));
3329566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mapout));
3334f572ea9SToby Isaac if (mapin->n) PetscAssertPointer(keysin, 3);
3344f572ea9SToby Isaac if (mapout->n) PetscAssertPointer(keysout, 4);
33508401ef6SPierre Jolivet PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
3369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
337ce605777SToby Isaac if (size == 1) {
338*418fb43bSPierre Jolivet if (keysout != keysin) PetscCall(PetscArraycpy(keysout, keysin, mapin->n));
3399566063dSJacob Faibussowitsch PetscCall(PetscSortInt(mapout->n, keysout));
3403ba16761SJacob Faibussowitsch if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
341ce605777SToby Isaac }
342ce605777SToby Isaac if (keysout != keysin) {
3439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mapin->n, &keysincopy));
344*418fb43bSPierre Jolivet PetscCall(PetscArraycpy(keysincopy, keysin, mapin->n));
345ce605777SToby Isaac keysin = keysincopy;
346ce605777SToby Isaac }
3479566063dSJacob Faibussowitsch PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
348ce605777SToby Isaac #if defined(PETSC_USE_DEBUG)
349ce605777SToby Isaac {
350ce605777SToby Isaac PetscBool sorted;
351ce605777SToby Isaac
3529566063dSJacob Faibussowitsch PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
35328b400f6SJacob Faibussowitsch PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
354ce605777SToby Isaac }
355ce605777SToby Isaac #endif
3569566063dSJacob Faibussowitsch PetscCall(PetscFree(keysincopy));
3573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
358ce605777SToby Isaac }
359