1 2 #include <petsc/private/petscimpl.h> 3 #include <petscis.h> /*I "petscis.h" I*/ 4 5 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 6 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 7 { 8 PetscInt diff; 9 PetscInt split, mid, partner; 10 11 PetscFunctionBegin; 12 diff = rankEnd - rankStart; 13 if (diff <= 0) PetscFunctionReturn(0); 14 if (diff == 1) { 15 if (forward) { 16 PetscCall(PetscSortInt((PetscInt) n, keys)); 17 } else { 18 PetscCall(PetscSortReverseInt((PetscInt) n, keys)); 19 } 20 PetscFunctionReturn(0); 21 } 22 split = 1; 23 while (2 * split < diff) split *= 2; 24 mid = rankStart + split; 25 if (rank < mid) { 26 partner = rank + split; 27 } else { 28 partner = rank - split; 29 } 30 if (partner < rankEnd) { 31 PetscMPIInt i; 32 33 PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 34 if ((rank < partner) == (forward == PETSC_TRUE)) { 35 for (i = 0; i < n; i++) { 36 keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 37 } 38 } else { 39 for (i = 0; i < n; i++) { 40 keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 41 } 42 } 43 } 44 /* divide and conquer */ 45 if (rank < mid) { 46 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 47 } else { 48 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 49 } 50 PetscFunctionReturn(0); 51 } 52 53 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 54 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 55 { 56 PetscInt diff; 57 PetscInt mid; 58 59 PetscFunctionBegin; 60 diff = rankEnd - rankStart; 61 if (diff <= 0) PetscFunctionReturn(0); 62 if (diff == 1) { 63 if (forward) { 64 PetscCall(PetscSortInt(n, keys)); 65 } else { 66 PetscCall(PetscSortReverseInt(n, keys)); 67 } 68 PetscFunctionReturn(0); 69 } 70 mid = rankStart + diff / 2; 71 /* divide and conquer */ 72 if (rank < mid) { 73 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward)); 74 } else { 75 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 76 } 77 /* bitonic merge */ 78 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 79 PetscFunctionReturn(0); 80 } 81 82 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 83 { 84 PetscMPIInt size, rank, tag, mpin; 85 PetscInt *buffer; 86 87 PetscFunctionBegin; 88 PetscValidIntPointer(keys, 3); 89 PetscCall(PetscCommGetNewTag(comm, &tag)); 90 PetscCallMPI(MPI_Comm_size(comm, &size)); 91 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 92 PetscCall(PetscMPIIntCast(n, &mpin)); 93 PetscCall(PetscMalloc1(n, &buffer)); 94 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 95 PetscCall(PetscFree(buffer)); 96 PetscFunctionReturn(0); 97 } 98 99 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 100 { 101 PetscMPIInt size, rank; 102 PetscInt *pivots, *finalpivots, i; 103 PetscInt non_empty, my_first, count; 104 PetscMPIInt *keys_per, max_keys_per; 105 106 PetscFunctionBegin; 107 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 108 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 109 110 /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 111 PetscCall(PetscMalloc1(size - 1, &pivots)); 112 for (i = 0; i < size - 1; i++) { 113 PetscInt index; 114 115 if (!mapin->n) { 116 /* if this rank is empty, put "infinity" in the list. each process knows how many empty 117 * processes are in the layout, so those values will be ignored from the end of the sorted 118 * pivots */ 119 pivots[i] = PETSC_MAX_INT; 120 } else { 121 /* match the distribution to the desired output described by mapout by getting the index 122 * that is approximately the appropriate fraction through the list */ 123 index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N); 124 index = PetscMin(index, (mapin->n - 1)); 125 index = PetscMax(index, 0); 126 pivots[i] = keysin[index]; 127 } 128 } 129 /* sort the pivots in parallel */ 130 PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 131 if (PetscDefined(USE_DEBUG)) { 132 PetscBool sorted; 133 134 PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 135 PetscCheck(sorted,mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 136 } 137 138 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 139 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 140 non_empty = size; 141 for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--; 142 PetscCall(PetscCalloc1(size + 1, &keys_per)); 143 my_first = -1; 144 if (non_empty) { 145 for (i = 0; i < size - 1; i++) { 146 size_t sample = (i + 1) * non_empty - 1; 147 size_t sample_rank = sample / (size - 1); 148 149 keys_per[sample_rank]++; 150 if (my_first < 0 && (PetscMPIInt) sample_rank == rank) { 151 my_first = (PetscInt) (sample - sample_rank * (size - 1)); 152 } 153 } 154 } 155 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 156 PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots)); 157 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 158 * and allgather them */ 159 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 160 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 161 PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 162 for (i = 0, count = 0; i < size; i++) { 163 PetscInt j; 164 165 for (j = 0; j < max_keys_per; j++) { 166 if (j < keys_per[i]) { 167 finalpivots[count++] = finalpivots[i * max_keys_per + j]; 168 } 169 } 170 } 171 *outpivots = finalpivots; 172 PetscCall(PetscFree(keys_per)); 173 PetscCall(PetscFree(pivots)); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 178 { 179 PetscMPIInt size, rank; 180 PetscInt myOffset, nextOffset; 181 PetscInt i; 182 PetscMPIInt total, filled; 183 PetscMPIInt sender, nfirst, nsecond; 184 PetscMPIInt firsttag, secondtag; 185 MPI_Request firstreqrcv; 186 MPI_Request *firstreqs; 187 MPI_Request *secondreqs; 188 MPI_Status firststatus; 189 190 PetscFunctionBegin; 191 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 192 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 193 PetscCall(PetscCommGetNewTag(map->comm, &firsttag)); 194 PetscCall(PetscCommGetNewTag(map->comm, &secondtag)); 195 myOffset = 0; 196 PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 197 PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 198 myOffset = nextOffset - n; 199 total = map->range[rank + 1] - map->range[rank]; 200 if (total > 0) { 201 PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 202 } 203 for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) { 204 PetscInt itotal; 205 PetscInt overlap, oStart, oEnd; 206 207 itotal = map->range[i + 1] - map->range[i]; 208 if (itotal <= 0) continue; 209 oStart = PetscMax(myOffset, map->range[i]); 210 oEnd = PetscMin(nextOffset, map->range[i + 1]); 211 overlap = oEnd - oStart; 212 if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 213 /* send first message */ 214 PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]))); 215 } else if (overlap > 0) { 216 /* send second message */ 217 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 218 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 219 /* send empty second message */ 220 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 221 } 222 } 223 filled = 0; 224 sender = -1; 225 if (total > 0) { 226 PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus)); 227 sender = firststatus.MPI_SOURCE; 228 PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled)); 229 } 230 while (filled < total) { 231 PetscMPIInt mfilled; 232 MPI_Status stat; 233 234 sender++; 235 PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat)); 236 PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled)); 237 filled += mfilled; 238 } 239 PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 240 PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 241 PetscCall(PetscFree2(firstreqs, secondreqs)); 242 PetscFunctionReturn(0); 243 } 244 245 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 246 { 247 PetscMPIInt size, rank; 248 PetscInt *pivots = NULL, *buffer; 249 PetscInt i, j; 250 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 251 252 PetscFunctionBegin; 253 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 254 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 255 PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 256 /* sort locally */ 257 PetscCall(PetscSortInt(mapin->n, keysin)); 258 /* get P - 1 pivots */ 259 PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 260 /* determine which entries in the sorted array go to which other processes based on the pivots */ 261 for (i = 0, j = 0; i < size - 1; i++) { 262 PetscInt prev = j; 263 264 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 265 offsets_snd[i] = prev; 266 keys_per_snd[i] = j - prev; 267 } 268 offsets_snd[size - 1] = j; 269 keys_per_snd[size - 1] = mapin->n - j; 270 offsets_snd[size] = mapin->n; 271 /* get the incoming sizes */ 272 PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 273 offsets_rcv[0] = 0; 274 for (i = 0; i < size; i++) { 275 offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i]; 276 } 277 nrecv = offsets_rcv[size]; 278 /* all to all exchange */ 279 PetscCall(PetscMalloc1(nrecv, &buffer)); 280 PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 281 PetscCall(PetscFree(pivots)); 282 PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 283 284 /* local sort */ 285 PetscCall(PetscSortInt(nrecv, buffer)); 286 #if defined(PETSC_USE_DEBUG) 287 { 288 PetscBool sorted; 289 290 PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 291 PetscCheck(sorted,mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 292 } 293 #endif 294 295 /* redistribute to the desired order */ 296 PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 297 PetscCall(PetscFree(buffer)); 298 PetscFunctionReturn(0); 299 } 300 301 /*@ 302 PetscParallelSortInt - Globally sort a distributed array of integers 303 304 Collective 305 306 Input Parameters: 307 + mapin - PetscLayout describing the distribution of the input keys 308 . mapout - PetscLayout describing the distribution of the output keys 309 - keysin - the pre-sorted array of integers 310 311 Output Parameter: 312 . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout. 313 314 Level: developer 315 316 Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does: 317 318 - sorts locally 319 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 320 - using to the pivots to repartition the keys by all-to-all exchange 321 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 322 - redistributing to match the mapout layout 323 324 If keysin != keysout, then keysin will not be changed during PetscParallelSortInt. 325 326 .seealso: `PetscParallelSortedInt()` 327 @*/ 328 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 329 { 330 PetscMPIInt size; 331 PetscMPIInt result; 332 PetscInt *keysincopy = NULL; 333 334 PetscFunctionBegin; 335 PetscValidPointer(mapin, 1); 336 PetscValidPointer(mapout, 2); 337 PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 338 PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT,mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 339 PetscCall(PetscLayoutSetUp(mapin)); 340 PetscCall(PetscLayoutSetUp(mapout)); 341 if (mapin->n) PetscValidIntPointer(keysin, 3); 342 if (mapout->n) PetscValidIntPointer(keysout, 4); 343 PetscCheck(mapin->N == mapout->N,mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 344 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 345 if (size == 1) { 346 if (keysout != keysin) { 347 PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 348 } 349 PetscCall(PetscSortInt(mapout->n, keysout)); 350 if (size == 1) PetscFunctionReturn(0); 351 } 352 if (keysout != keysin) { 353 PetscCall(PetscMalloc1(mapin->n, &keysincopy)); 354 PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 355 keysin = keysincopy; 356 } 357 PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 358 #if defined(PETSC_USE_DEBUG) 359 { 360 PetscBool sorted; 361 362 PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 363 PetscCheck(sorted,mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 364 } 365 #endif 366 PetscCall(PetscFree(keysincopy)); 367 PetscFunctionReturn(0); 368 } 369