1 #include <petsc/private/petscimpl.h> 2 #include <petscis.h> /*I "petscis.h" I*/ 3 4 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 5 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 6 { 7 PetscInt diff; 8 PetscMPIInt split, mid, partner; 9 10 PetscFunctionBegin; 11 diff = rankEnd - rankStart; 12 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 13 if (diff == 1) { 14 if (forward) { 15 PetscCall(PetscSortInt(n, keys)); 16 } else { 17 PetscCall(PetscSortReverseInt(n, keys)); 18 } 19 PetscFunctionReturn(PETSC_SUCCESS); 20 } 21 split = 1; 22 while (2 * split < diff) split *= 2; 23 mid = rankStart + split; 24 if (rank < mid) { 25 partner = rank + split; 26 } else { 27 partner = rank - split; 28 } 29 if (partner < rankEnd) { 30 PetscMPIInt i; 31 32 PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 33 if ((rank < partner) == (forward == PETSC_TRUE)) { 34 for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 35 } else { 36 for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 37 } 38 } 39 /* divide and conquer */ 40 if (rank < mid) { 41 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 42 } else { 43 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 44 } 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 49 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 50 { 51 PetscMPIInt diff, mid; 52 53 PetscFunctionBegin; 54 diff = rankEnd - rankStart; 55 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 56 if (diff == 1) { 57 if (forward) { 58 PetscCall(PetscSortInt(n, keys)); 59 } else { 60 PetscCall(PetscSortReverseInt(n, keys)); 61 } 62 PetscFunctionReturn(PETSC_SUCCESS); 63 } 64 mid = rankStart + diff / 2; 65 /* divide and conquer */ 66 if (rank < mid) { 67 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward)); 68 } else { 69 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 70 } 71 /* bitonic merge */ 72 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 73 PetscFunctionReturn(PETSC_SUCCESS); 74 } 75 76 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 77 { 78 PetscMPIInt size, rank, tag, mpin; 79 PetscInt *buffer; 80 81 PetscFunctionBegin; 82 PetscAssertPointer(keys, 3); 83 PetscCall(PetscCommGetNewTag(comm, &tag)); 84 PetscCallMPI(MPI_Comm_size(comm, &size)); 85 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 86 PetscCall(PetscMPIIntCast(n, &mpin)); 87 PetscCall(PetscMalloc1(n, &buffer)); 88 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 89 PetscCall(PetscFree(buffer)); 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 93 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 94 { 95 PetscMPIInt size, rank; 96 PetscInt *pivots, *finalpivots, i; 97 PetscInt non_empty, my_first, count; 98 PetscMPIInt *keys_per, max_keys_per; 99 100 PetscFunctionBegin; 101 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 102 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 103 104 /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 105 PetscCall(PetscMalloc1(size - 1, &pivots)); 106 for (i = 0; i < size - 1; i++) { 107 PetscInt index; 108 109 if (!mapin->n) { 110 /* if this rank is empty, put "infinity" in the list. each process knows how many empty 111 * processes are in the layout, so those values will be ignored from the end of the sorted 112 * pivots */ 113 pivots[i] = PETSC_INT_MAX; 114 } else { 115 /* match the distribution to the desired output described by mapout by getting the index 116 * that is approximately the appropriate fraction through the list */ 117 index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N); 118 index = PetscMin(index, mapin->n - 1); 119 index = PetscMax(index, 0); 120 pivots[i] = keysin[index]; 121 } 122 } 123 /* sort the pivots in parallel */ 124 PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 125 if (PetscDefined(USE_DEBUG)) { 126 PetscBool sorted; 127 128 PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 129 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 130 } 131 132 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 133 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 134 non_empty = size; 135 for (i = 0; i < size; i++) 136 if (mapout->range[i] == mapout->range[i + 1]) non_empty--; 137 PetscCall(PetscCalloc1(size + 1, &keys_per)); 138 my_first = -1; 139 if (non_empty) { 140 for (i = 0; i < size - 1; i++) { 141 size_t sample = (i + 1) * non_empty - 1; 142 size_t sample_rank = sample / (size - 1); 143 144 keys_per[sample_rank]++; 145 if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1)); 146 } 147 } 148 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 149 PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots)); 150 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 151 * and allgather them */ 152 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 153 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_INT_MAX; 154 PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 155 for (i = 0, count = 0; i < size; i++) { 156 PetscInt j; 157 158 for (j = 0; j < max_keys_per; j++) { 159 if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j]; 160 } 161 } 162 *outpivots = finalpivots; 163 PetscCall(PetscFree(keys_per)); 164 PetscCall(PetscFree(pivots)); 165 PetscFunctionReturn(PETSC_SUCCESS); 166 } 167 168 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 169 { 170 PetscMPIInt size, rank; 171 PetscInt myOffset, nextOffset; 172 PetscCount total; 173 PetscMPIInt nfirst, nsecond; 174 PetscMPIInt firsttag, secondtag; 175 MPI_Request firstreqrcv; 176 MPI_Request *firstreqs; 177 MPI_Request *secondreqs; 178 179 PetscFunctionBegin; 180 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 181 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 182 PetscCall(PetscCommGetNewTag(map->comm, &firsttag)); 183 PetscCall(PetscCommGetNewTag(map->comm, &secondtag)); 184 PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 185 PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 186 myOffset = nextOffset - n; 187 total = map->range[rank + 1] - map->range[rank]; 188 if (total > 0) PetscCallMPI(MPIU_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 189 nsecond = 0; 190 nfirst = 0; 191 for (PetscMPIInt i = 0; i < size; i++) { 192 PetscInt itotal; 193 PetscInt overlap, oStart, oEnd; 194 195 itotal = map->range[i + 1] - map->range[i]; 196 if (itotal <= 0) continue; 197 oStart = PetscMax(myOffset, map->range[i]); 198 oEnd = PetscMin(nextOffset, map->range[i + 1]); 199 overlap = oEnd - oStart; 200 if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 201 /* send first message */ 202 PetscCallMPI(MPIU_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &firstreqs[nfirst++])); 203 } else if (overlap > 0) { 204 /* send second message */ 205 PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 206 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 207 /* send empty second message */ 208 PetscCallMPI(MPIU_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 209 } 210 } 211 if (total > 0) { 212 MPI_Status status; 213 PetscMPIInt sender = -1; 214 PetscCount filled = 0; 215 216 PetscCallMPI(MPI_Wait(&firstreqrcv, &status)); 217 sender = status.MPI_SOURCE; 218 PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &filled)); 219 while (filled < total) { 220 PetscCount mfilled; 221 222 sender++; 223 PetscCallMPI(MPIU_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &status)); 224 PetscCallMPI(MPIU_Get_count(&status, MPIU_INT, &mfilled)); 225 filled += mfilled; 226 } 227 } 228 PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 229 PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 230 PetscCall(PetscFree2(firstreqs, secondreqs)); 231 PetscFunctionReturn(PETSC_SUCCESS); 232 } 233 234 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 235 { 236 PetscMPIInt size, rank; 237 PetscInt *pivots = NULL, *buffer; 238 PetscInt j; 239 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 240 241 PetscFunctionBegin; 242 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 243 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 244 PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 245 /* sort locally */ 246 PetscCall(PetscSortInt(mapin->n, keysin)); 247 /* get P - 1 pivots */ 248 PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 249 /* determine which entries in the sorted array go to which other processes based on the pivots */ 250 j = 0; 251 for (PetscMPIInt i = 0; i < size - 1; i++) { 252 PetscInt prev = j; 253 254 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 255 PetscCall(PetscMPIIntCast(prev, &offsets_snd[i])); 256 PetscCall(PetscMPIIntCast(j - prev, &keys_per_snd[i])); 257 } 258 PetscCall(PetscMPIIntCast(j, &offsets_snd[size - 1])); 259 PetscCall(PetscMPIIntCast(mapin->n - j, &keys_per_snd[size - 1])); 260 PetscCall(PetscMPIIntCast(mapin->n, &offsets_snd[size])); 261 /* get the incoming sizes */ 262 PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 263 offsets_rcv[0] = 0; 264 for (PetscMPIInt i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i]; 265 nrecv = offsets_rcv[size]; 266 /* all to all exchange */ 267 PetscCall(PetscMalloc1(nrecv, &buffer)); 268 PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 269 PetscCall(PetscFree(pivots)); 270 PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 271 272 /* local sort */ 273 PetscCall(PetscSortInt(nrecv, buffer)); 274 #if defined(PETSC_USE_DEBUG) 275 { 276 PetscBool sorted; 277 278 PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 279 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 280 } 281 #endif 282 283 /* redistribute to the desired order */ 284 PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 285 PetscCall(PetscFree(buffer)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 /*@ 290 PetscParallelSortInt - Globally sort a distributed array of integers 291 292 Collective 293 294 Input Parameters: 295 + mapin - `PetscLayout` describing the distribution of the input keys 296 . mapout - `PetscLayout` describing the desired distribution of the output keys 297 - keysin - the pre-sorted array of integers 298 299 Output Parameter: 300 . keysout - the array in which the sorted integers will be stored. If `mapin` == `mapout`, then `keysin` may be equal to `keysout`. 301 302 Level: developer 303 304 Notes: 305 306 This implements a distributed samplesort, which, with local array sizes n_in and n_out, 307 global size N, and global number of MPI processes P, does\: 308 .vb 309 - sorts locally 310 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 311 - using to the pivots to repartition the keys by all-to-all exchange 312 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 313 - redistributing to match the mapout layout 314 .ve 315 316 If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`. 317 318 .seealso: `PetscSortInt()`, `PetscParallelSortedInt()` 319 @*/ 320 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 321 { 322 PetscMPIInt size; 323 PetscMPIInt result; 324 PetscInt *keysincopy = NULL; 325 326 PetscFunctionBegin; 327 PetscAssertPointer(mapin, 1); 328 PetscAssertPointer(mapout, 2); 329 PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 330 PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 331 PetscCall(PetscLayoutSetUp(mapin)); 332 PetscCall(PetscLayoutSetUp(mapout)); 333 if (mapin->n) PetscAssertPointer(keysin, 3); 334 if (mapout->n) PetscAssertPointer(keysout, 4); 335 PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 336 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 337 if (size == 1) { 338 if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 339 PetscCall(PetscSortInt(mapout->n, keysout)); 340 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 if (keysout != keysin) { 343 PetscCall(PetscMalloc1(mapin->n, &keysincopy)); 344 PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 345 keysin = keysincopy; 346 } 347 PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 348 #if defined(PETSC_USE_DEBUG) 349 { 350 PetscBool sorted; 351 352 PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 353 PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 354 } 355 #endif 356 PetscCall(PetscFree(keysincopy)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359