1 2 #include <petsc/private/petscimpl.h> 3 #include <petscis.h> /*I "petscis.h" I*/ 4 5 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 6 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) { 7 PetscInt diff; 8 PetscInt split, mid, partner; 9 10 PetscFunctionBegin; 11 diff = rankEnd - rankStart; 12 if (diff <= 0) PetscFunctionReturn(0); 13 if (diff == 1) { 14 if (forward) { 15 PetscCall(PetscSortInt((PetscInt)n, keys)); 16 } else { 17 PetscCall(PetscSortReverseInt((PetscInt)n, keys)); 18 } 19 PetscFunctionReturn(0); 20 } 21 split = 1; 22 while (2 * split < diff) split *= 2; 23 mid = rankStart + split; 24 if (rank < mid) { 25 partner = rank + split; 26 } else { 27 partner = rank - split; 28 } 29 if (partner < rankEnd) { 30 PetscMPIInt i; 31 32 PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 33 if ((rank < partner) == (forward == PETSC_TRUE)) { 34 for (i = 0; i < n; i++) { keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; } 35 } else { 36 for (i = 0; i < n; i++) { keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; } 37 } 38 } 39 /* divide and conquer */ 40 if (rank < mid) { 41 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 42 } else { 43 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 44 } 45 PetscFunctionReturn(0); 46 } 47 48 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 49 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) { 50 PetscInt diff; 51 PetscInt mid; 52 53 PetscFunctionBegin; 54 diff = rankEnd - rankStart; 55 if (diff <= 0) PetscFunctionReturn(0); 56 if (diff == 1) { 57 if (forward) { 58 PetscCall(PetscSortInt(n, keys)); 59 } else { 60 PetscCall(PetscSortReverseInt(n, keys)); 61 } 62 PetscFunctionReturn(0); 63 } 64 mid = rankStart + diff / 2; 65 /* divide and conquer */ 66 if (rank < mid) { 67 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward)); 68 } else { 69 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 70 } 71 /* bitonic merge */ 72 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 73 PetscFunctionReturn(0); 74 } 75 76 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) { 77 PetscMPIInt size, rank, tag, mpin; 78 PetscInt *buffer; 79 80 PetscFunctionBegin; 81 PetscValidIntPointer(keys, 3); 82 PetscCall(PetscCommGetNewTag(comm, &tag)); 83 PetscCallMPI(MPI_Comm_size(comm, &size)); 84 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 85 PetscCall(PetscMPIIntCast(n, &mpin)); 86 PetscCall(PetscMalloc1(n, &buffer)); 87 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 88 PetscCall(PetscFree(buffer)); 89 PetscFunctionReturn(0); 90 } 91 92 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) { 93 PetscMPIInt size, rank; 94 PetscInt *pivots, *finalpivots, i; 95 PetscInt non_empty, my_first, count; 96 PetscMPIInt *keys_per, max_keys_per; 97 98 PetscFunctionBegin; 99 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 100 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 101 102 /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 103 PetscCall(PetscMalloc1(size - 1, &pivots)); 104 for (i = 0; i < size - 1; i++) { 105 PetscInt index; 106 107 if (!mapin->n) { 108 /* if this rank is empty, put "infinity" in the list. each process knows how many empty 109 * processes are in the layout, so those values will be ignored from the end of the sorted 110 * pivots */ 111 pivots[i] = PETSC_MAX_INT; 112 } else { 113 /* match the distribution to the desired output described by mapout by getting the index 114 * that is approximately the appropriate fraction through the list */ 115 index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N); 116 index = PetscMin(index, (mapin->n - 1)); 117 index = PetscMax(index, 0); 118 pivots[i] = keysin[index]; 119 } 120 } 121 /* sort the pivots in parallel */ 122 PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 123 if (PetscDefined(USE_DEBUG)) { 124 PetscBool sorted; 125 126 PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 127 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 128 } 129 130 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 131 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 132 non_empty = size; 133 for (i = 0; i < size; i++) 134 if (mapout->range[i] == mapout->range[i + 1]) non_empty--; 135 PetscCall(PetscCalloc1(size + 1, &keys_per)); 136 my_first = -1; 137 if (non_empty) { 138 for (i = 0; i < size - 1; i++) { 139 size_t sample = (i + 1) * non_empty - 1; 140 size_t sample_rank = sample / (size - 1); 141 142 keys_per[sample_rank]++; 143 if (my_first < 0 && (PetscMPIInt)sample_rank == rank) { my_first = (PetscInt)(sample - sample_rank * (size - 1)); } 144 } 145 } 146 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 147 PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots)); 148 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 149 * and allgather them */ 150 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 151 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 152 PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 153 for (i = 0, count = 0; i < size; i++) { 154 PetscInt j; 155 156 for (j = 0; j < max_keys_per; j++) { 157 if (j < keys_per[i]) { finalpivots[count++] = finalpivots[i * max_keys_per + j]; } 158 } 159 } 160 *outpivots = finalpivots; 161 PetscCall(PetscFree(keys_per)); 162 PetscCall(PetscFree(pivots)); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) { 167 PetscMPIInt size, rank; 168 PetscInt myOffset, nextOffset; 169 PetscInt i; 170 PetscMPIInt total, filled; 171 PetscMPIInt sender, nfirst, nsecond; 172 PetscMPIInt firsttag, secondtag; 173 MPI_Request firstreqrcv; 174 MPI_Request *firstreqs; 175 MPI_Request *secondreqs; 176 MPI_Status firststatus; 177 178 PetscFunctionBegin; 179 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 180 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 181 PetscCall(PetscCommGetNewTag(map->comm, &firsttag)); 182 PetscCall(PetscCommGetNewTag(map->comm, &secondtag)); 183 myOffset = 0; 184 PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 185 PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 186 myOffset = nextOffset - n; 187 total = map->range[rank + 1] - map->range[rank]; 188 if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 189 for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) { 190 PetscInt itotal; 191 PetscInt overlap, oStart, oEnd; 192 193 itotal = map->range[i + 1] - map->range[i]; 194 if (itotal <= 0) continue; 195 oStart = PetscMax(myOffset, map->range[i]); 196 oEnd = PetscMin(nextOffset, map->range[i + 1]); 197 overlap = oEnd - oStart; 198 if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 199 /* send first message */ 200 PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]))); 201 } else if (overlap > 0) { 202 /* send second message */ 203 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 204 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 205 /* send empty second message */ 206 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 207 } 208 } 209 filled = 0; 210 sender = -1; 211 if (total > 0) { 212 PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus)); 213 sender = firststatus.MPI_SOURCE; 214 PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled)); 215 } 216 while (filled < total) { 217 PetscMPIInt mfilled; 218 MPI_Status stat; 219 220 sender++; 221 PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat)); 222 PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled)); 223 filled += mfilled; 224 } 225 PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 226 PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 227 PetscCall(PetscFree2(firstreqs, secondreqs)); 228 PetscFunctionReturn(0); 229 } 230 231 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) { 232 PetscMPIInt size, rank; 233 PetscInt *pivots = NULL, *buffer; 234 PetscInt i, j; 235 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 236 237 PetscFunctionBegin; 238 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 239 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 240 PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 241 /* sort locally */ 242 PetscCall(PetscSortInt(mapin->n, keysin)); 243 /* get P - 1 pivots */ 244 PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 245 /* determine which entries in the sorted array go to which other processes based on the pivots */ 246 for (i = 0, j = 0; i < size - 1; i++) { 247 PetscInt prev = j; 248 249 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 250 offsets_snd[i] = prev; 251 keys_per_snd[i] = j - prev; 252 } 253 offsets_snd[size - 1] = j; 254 keys_per_snd[size - 1] = mapin->n - j; 255 offsets_snd[size] = mapin->n; 256 /* get the incoming sizes */ 257 PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 258 offsets_rcv[0] = 0; 259 for (i = 0; i < size; i++) { offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i]; } 260 nrecv = offsets_rcv[size]; 261 /* all to all exchange */ 262 PetscCall(PetscMalloc1(nrecv, &buffer)); 263 PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 264 PetscCall(PetscFree(pivots)); 265 PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 266 267 /* local sort */ 268 PetscCall(PetscSortInt(nrecv, buffer)); 269 #if defined(PETSC_USE_DEBUG) 270 { 271 PetscBool sorted; 272 273 PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 274 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 275 } 276 #endif 277 278 /* redistribute to the desired order */ 279 PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 280 PetscCall(PetscFree(buffer)); 281 PetscFunctionReturn(0); 282 } 283 284 /*@ 285 PetscParallelSortInt - Globally sort a distributed array of integers 286 287 Collective 288 289 Input Parameters: 290 + mapin - PetscLayout describing the distribution of the input keys 291 . mapout - PetscLayout describing the distribution of the output keys 292 - keysin - the pre-sorted array of integers 293 294 Output Parameter: 295 . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout. 296 297 Level: developer 298 299 Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does: 300 301 - sorts locally 302 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 303 - using to the pivots to repartition the keys by all-to-all exchange 304 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 305 - redistributing to match the mapout layout 306 307 If keysin != keysout, then keysin will not be changed during PetscParallelSortInt. 308 309 .seealso: `PetscParallelSortedInt()` 310 @*/ 311 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) { 312 PetscMPIInt size; 313 PetscMPIInt result; 314 PetscInt *keysincopy = NULL; 315 316 PetscFunctionBegin; 317 PetscValidPointer(mapin, 1); 318 PetscValidPointer(mapout, 2); 319 PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 320 PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 321 PetscCall(PetscLayoutSetUp(mapin)); 322 PetscCall(PetscLayoutSetUp(mapout)); 323 if (mapin->n) PetscValidIntPointer(keysin, 3); 324 if (mapout->n) PetscValidIntPointer(keysout, 4); 325 PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 326 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 327 if (size == 1) { 328 if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 329 PetscCall(PetscSortInt(mapout->n, keysout)); 330 if (size == 1) PetscFunctionReturn(0); 331 } 332 if (keysout != keysin) { 333 PetscCall(PetscMalloc1(mapin->n, &keysincopy)); 334 PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 335 keysin = keysincopy; 336 } 337 PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 338 #if defined(PETSC_USE_DEBUG) 339 { 340 PetscBool sorted; 341 342 PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 343 PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 344 } 345 #endif 346 PetscCall(PetscFree(keysincopy)); 347 PetscFunctionReturn(0); 348 } 349