1 #include <petsc/private/petscimpl.h> 2 #include <petscis.h> /*I "petscis.h" I*/ 3 4 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 5 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 6 { 7 PetscInt diff; 8 PetscMPIInt split, mid, partner; 9 10 PetscFunctionBegin; 11 diff = rankEnd - rankStart; 12 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 13 if (diff == 1) { 14 if (forward) { 15 PetscCall(PetscSortInt(n, keys)); 16 } else { 17 PetscCall(PetscSortReverseInt(n, keys)); 18 } 19 PetscFunctionReturn(PETSC_SUCCESS); 20 } 21 split = 1; 22 while (2 * split < diff) split *= 2; 23 mid = rankStart + split; 24 if (rank < mid) { 25 partner = rank + split; 26 } else { 27 partner = rank - split; 28 } 29 if (partner < rankEnd) { 30 PetscMPIInt i; 31 32 PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 33 if ((rank < partner) == (forward == PETSC_TRUE)) { 34 for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 35 } else { 36 for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 37 } 38 } 39 /* divide and conquer */ 40 if (rank < mid) { 41 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 42 } else { 43 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 44 } 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 49 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 50 { 51 PetscMPIInt diff, mid; 52 53 PetscFunctionBegin; 54 diff = rankEnd - rankStart; 55 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 56 if (diff == 1) { 57 if (forward) { 58 PetscCall(PetscSortInt(n, keys)); 59 } else { 60 PetscCall(PetscSortReverseInt(n, keys)); 61 } 62 PetscFunctionReturn(PETSC_SUCCESS); 63 } 64 mid = rankStart + diff / 2; 65 /* divide and conquer */ 66 if (rank < mid) { 67 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward)); 68 } else { 69 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 70 } 71 /* bitonic merge */ 72 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 73 PetscFunctionReturn(PETSC_SUCCESS); 74 } 75 76 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 77 { 78 PetscMPIInt size, rank, tag, mpin; 79 PetscInt *buffer; 80 81 PetscFunctionBegin; 82 PetscAssertPointer(keys, 3); 83 PetscCall(PetscCommGetNewTag(comm, &tag)); 84 PetscCallMPI(MPI_Comm_size(comm, &size)); 85 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 86 PetscCall(PetscMPIIntCast(n, &mpin)); 87 PetscCall(PetscMalloc1(n, &buffer)); 88 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 89 PetscCall(PetscFree(buffer)); 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 93 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 94 { 95 PetscMPIInt size, rank; 96 PetscInt *pivots, *finalpivots, i; 97 PetscInt non_empty, my_first, count; 98 PetscMPIInt *keys_per, max_keys_per; 99 100 PetscFunctionBegin; 101 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 102 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 103 104 /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 105 PetscCall(PetscMalloc1(size - 1, &pivots)); 106 for (i = 0; i < size - 1; i++) { 107 PetscInt index; 108 109 if (!mapin->n) { 110 /* if this rank is empty, put "infinity" in the list. each process knows how many empty 111 * processes are in the layout, so those values will be ignored from the end of the sorted 112 * pivots */ 113 pivots[i] = PETSC_MAX_INT; 114 } else { 115 /* match the distribution to the desired output described by mapout by getting the index 116 * that is approximately the appropriate fraction through the list */ 117 index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N); 118 index = PetscMin(index, (mapin->n - 1)); 119 index = PetscMax(index, 0); 120 pivots[i] = keysin[index]; 121 } 122 } 123 /* sort the pivots in parallel */ 124 PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 125 if (PetscDefined(USE_DEBUG)) { 126 PetscBool sorted; 127 128 PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 129 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 130 } 131 132 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 133 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 134 non_empty = size; 135 for (i = 0; i < size; i++) 136 if (mapout->range[i] == mapout->range[i + 1]) non_empty--; 137 PetscCall(PetscCalloc1(size + 1, &keys_per)); 138 my_first = -1; 139 if (non_empty) { 140 for (i = 0; i < size - 1; i++) { 141 size_t sample = (i + 1) * non_empty - 1; 142 size_t sample_rank = sample / (size - 1); 143 144 keys_per[sample_rank]++; 145 if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1)); 146 } 147 } 148 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 149 PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots)); 150 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 151 * and allgather them */ 152 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 153 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 154 PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 155 for (i = 0, count = 0; i < size; i++) { 156 PetscInt j; 157 158 for (j = 0; j < max_keys_per; j++) { 159 if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j]; 160 } 161 } 162 *outpivots = finalpivots; 163 PetscCall(PetscFree(keys_per)); 164 PetscCall(PetscFree(pivots)); 165 PetscFunctionReturn(PETSC_SUCCESS); 166 } 167 168 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 169 { 170 PetscMPIInt size, rank; 171 PetscInt myOffset, nextOffset; 172 PetscCount total, filled; 173 PetscMPIInt sender, nfirst, nsecond; 174 PetscMPIInt firsttag, secondtag; 175 MPI_Request firstreqrcv; 176 MPI_Request *firstreqs; 177 MPI_Request *secondreqs; 178 179 PetscFunctionBegin; 180 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 181 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 182 PetscCall(PetscCommGetNewTag(map->comm, &firsttag)); 183 PetscCall(PetscCommGetNewTag(map->comm, &secondtag)); 184 myOffset = 0; 185 PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 186 PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 187 myOffset = nextOffset - n; 188 total = map->range[rank + 1] - map->range[rank]; 189 if (total > 0) PetscCallMPI(MPIU_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 190 nsecond = 0; 191 nfirst = 0; 192 for (PetscMPIInt i = 0; i < size; i++) { 193 PetscInt itotal; 194 PetscInt overlap, oStart, oEnd; 195 196 itotal = map->range[i + 1] - map->range[i]; 197 if (itotal <= 0) continue; 198 oStart = PetscMax(myOffset, map->range[i]); 199 oEnd = PetscMin(nextOffset, map->range[i + 1]); 200 overlap = oEnd - oStart; 201 if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 202 /* send first message */ 203 PetscCallMPI(MPIU_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &firstreqs[nfirst++])); 204 } else if (overlap > 0) { 205 /* send second message */ 206 PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 207 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 208 /* send empty second message */ 209 PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 210 } 211 } 212 filled = 0; 213 sender = -1; 214 if (total > 0) { 215 MPI_Status status; 216 217 PetscCallMPI(MPI_Wait(&firstreqrcv, &status)); 218 sender = status.MPI_SOURCE; 219 PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &filled)); 220 while (filled < total) { 221 PetscCount mfilled; 222 223 sender++; 224 PetscCallMPI(MPIU_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &status)); 225 PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &mfilled)); 226 filled += mfilled; 227 } 228 } 229 PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 230 PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 231 PetscCall(PetscFree2(firstreqs, secondreqs)); 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 236 { 237 PetscMPIInt size, rank; 238 PetscInt *pivots = NULL, *buffer; 239 PetscInt j; 240 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 241 242 PetscFunctionBegin; 243 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 244 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 245 PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 246 /* sort locally */ 247 PetscCall(PetscSortInt(mapin->n, keysin)); 248 /* get P - 1 pivots */ 249 PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 250 /* determine which entries in the sorted array go to which other processes based on the pivots */ 251 j = 0; 252 for (PetscMPIInt i = 0; i < size - 1; i++) { 253 PetscInt prev = j; 254 255 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 256 PetscCall(PetscMPIIntCast(prev, &offsets_snd[i])); 257 PetscCall(PetscMPIIntCast(j - prev, &keys_per_snd[i])); 258 } 259 PetscCall(PetscMPIIntCast(j, &offsets_snd[size - 1])); 260 PetscCall(PetscMPIIntCast(mapin->n - j, &keys_per_snd[size - 1])); 261 PetscCall(PetscMPIIntCast(mapin->n, &offsets_snd[size])); 262 /* get the incoming sizes */ 263 PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 264 offsets_rcv[0] = 0; 265 for (PetscMPIInt i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i]; 266 nrecv = offsets_rcv[size]; 267 /* all to all exchange */ 268 PetscCall(PetscMalloc1(nrecv, &buffer)); 269 PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 270 PetscCall(PetscFree(pivots)); 271 PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 272 273 /* local sort */ 274 PetscCall(PetscSortInt(nrecv, buffer)); 275 #if defined(PETSC_USE_DEBUG) 276 { 277 PetscBool sorted; 278 279 PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 280 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 281 } 282 #endif 283 284 /* redistribute to the desired order */ 285 PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 286 PetscCall(PetscFree(buffer)); 287 PetscFunctionReturn(PETSC_SUCCESS); 288 } 289 290 /*@ 291 PetscParallelSortInt - Globally sort a distributed array of integers 292 293 Collective 294 295 Input Parameters: 296 + mapin - `PetscLayout` describing the distribution of the input keys 297 . mapout - `PetscLayout` describing the desired distribution of the output keys 298 - keysin - the pre-sorted array of integers 299 300 Output Parameter: 301 . keysout - the array in which the sorted integers will be stored. If `mapin` == `mapout`, then `keysin` may be equal to `keysout`. 302 303 Level: developer 304 305 Notes: 306 307 This implements a distributed samplesort, which, with local array sizes n_in and n_out, 308 global size N, and global number of MPI processes P, does\: 309 .vb 310 - sorts locally 311 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 312 - using to the pivots to repartition the keys by all-to-all exchange 313 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 314 - redistributing to match the mapout layout 315 .ve 316 317 If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`. 318 319 .seealso: `PetscSortInt()`, `PetscParallelSortedInt()` 320 @*/ 321 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 322 { 323 PetscMPIInt size; 324 PetscMPIInt result; 325 PetscInt *keysincopy = NULL; 326 327 PetscFunctionBegin; 328 PetscAssertPointer(mapin, 1); 329 PetscAssertPointer(mapout, 2); 330 PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 331 PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 332 PetscCall(PetscLayoutSetUp(mapin)); 333 PetscCall(PetscLayoutSetUp(mapout)); 334 if (mapin->n) PetscAssertPointer(keysin, 3); 335 if (mapout->n) PetscAssertPointer(keysout, 4); 336 PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 337 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 338 if (size == 1) { 339 if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 340 PetscCall(PetscSortInt(mapout->n, keysout)); 341 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 if (keysout != keysin) { 344 PetscCall(PetscMalloc1(mapin->n, &keysincopy)); 345 PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 346 keysin = keysincopy; 347 } 348 PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 349 #if defined(PETSC_USE_DEBUG) 350 { 351 PetscBool sorted; 352 353 PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 354 PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 355 } 356 #endif 357 PetscCall(PetscFree(keysincopy)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360