1 2 #include <petsc/private/petscimpl.h> 3 #include <petscis.h> /*I "petscis.h" I*/ 4 5 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 6 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 7 { 8 PetscInt diff; 9 PetscInt split, mid, partner; 10 11 PetscFunctionBegin; 12 diff = rankEnd - rankStart; 13 if (diff <= 0) PetscFunctionReturn(0); 14 if (diff == 1) { 15 if (forward) { 16 PetscCall(PetscSortInt((PetscInt)n, keys)); 17 } else { 18 PetscCall(PetscSortReverseInt((PetscInt)n, keys)); 19 } 20 PetscFunctionReturn(0); 21 } 22 split = 1; 23 while (2 * split < diff) split *= 2; 24 mid = rankStart + split; 25 if (rank < mid) { 26 partner = rank + split; 27 } else { 28 partner = rank - split; 29 } 30 if (partner < rankEnd) { 31 PetscMPIInt i; 32 33 PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 34 if ((rank < partner) == (forward == PETSC_TRUE)) { 35 for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 36 } else { 37 for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 38 } 39 } 40 /* divide and conquer */ 41 if (rank < mid) { 42 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 43 } else { 44 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 45 } 46 PetscFunctionReturn(0); 47 } 48 49 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 50 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 51 { 52 PetscInt diff; 53 PetscInt mid; 54 55 PetscFunctionBegin; 56 diff = rankEnd - rankStart; 57 if (diff <= 0) PetscFunctionReturn(0); 58 if (diff == 1) { 59 if (forward) { 60 PetscCall(PetscSortInt(n, keys)); 61 } else { 62 PetscCall(PetscSortReverseInt(n, keys)); 63 } 64 PetscFunctionReturn(0); 65 } 66 mid = rankStart + diff / 2; 67 /* divide and conquer */ 68 if (rank < mid) { 69 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward)); 70 } else { 71 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 72 } 73 /* bitonic merge */ 74 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 75 PetscFunctionReturn(0); 76 } 77 78 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 79 { 80 PetscMPIInt size, rank, tag, mpin; 81 PetscInt *buffer; 82 83 PetscFunctionBegin; 84 PetscValidIntPointer(keys, 3); 85 PetscCall(PetscCommGetNewTag(comm, &tag)); 86 PetscCallMPI(MPI_Comm_size(comm, &size)); 87 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 88 PetscCall(PetscMPIIntCast(n, &mpin)); 89 PetscCall(PetscMalloc1(n, &buffer)); 90 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 91 PetscCall(PetscFree(buffer)); 92 PetscFunctionReturn(0); 93 } 94 95 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 96 { 97 PetscMPIInt size, rank; 98 PetscInt *pivots, *finalpivots, i; 99 PetscInt non_empty, my_first, count; 100 PetscMPIInt *keys_per, max_keys_per; 101 102 PetscFunctionBegin; 103 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 104 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 105 106 /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 107 PetscCall(PetscMalloc1(size - 1, &pivots)); 108 for (i = 0; i < size - 1; i++) { 109 PetscInt index; 110 111 if (!mapin->n) { 112 /* if this rank is empty, put "infinity" in the list. each process knows how many empty 113 * processes are in the layout, so those values will be ignored from the end of the sorted 114 * pivots */ 115 pivots[i] = PETSC_MAX_INT; 116 } else { 117 /* match the distribution to the desired output described by mapout by getting the index 118 * that is approximately the appropriate fraction through the list */ 119 index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N); 120 index = PetscMin(index, (mapin->n - 1)); 121 index = PetscMax(index, 0); 122 pivots[i] = keysin[index]; 123 } 124 } 125 /* sort the pivots in parallel */ 126 PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 127 if (PetscDefined(USE_DEBUG)) { 128 PetscBool sorted; 129 130 PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 131 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 132 } 133 134 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 135 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 136 non_empty = size; 137 for (i = 0; i < size; i++) 138 if (mapout->range[i] == mapout->range[i + 1]) non_empty--; 139 PetscCall(PetscCalloc1(size + 1, &keys_per)); 140 my_first = -1; 141 if (non_empty) { 142 for (i = 0; i < size - 1; i++) { 143 size_t sample = (i + 1) * non_empty - 1; 144 size_t sample_rank = sample / (size - 1); 145 146 keys_per[sample_rank]++; 147 if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1)); 148 } 149 } 150 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 151 PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots)); 152 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 153 * and allgather them */ 154 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 155 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 156 PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 157 for (i = 0, count = 0; i < size; i++) { 158 PetscInt j; 159 160 for (j = 0; j < max_keys_per; j++) { 161 if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j]; 162 } 163 } 164 *outpivots = finalpivots; 165 PetscCall(PetscFree(keys_per)); 166 PetscCall(PetscFree(pivots)); 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 171 { 172 PetscMPIInt size, rank; 173 PetscInt myOffset, nextOffset; 174 PetscInt i; 175 PetscMPIInt total, filled; 176 PetscMPIInt sender, nfirst, nsecond; 177 PetscMPIInt firsttag, secondtag; 178 MPI_Request firstreqrcv; 179 MPI_Request *firstreqs; 180 MPI_Request *secondreqs; 181 MPI_Status firststatus; 182 183 PetscFunctionBegin; 184 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 185 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 186 PetscCall(PetscCommGetNewTag(map->comm, &firsttag)); 187 PetscCall(PetscCommGetNewTag(map->comm, &secondtag)); 188 myOffset = 0; 189 PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 190 PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 191 myOffset = nextOffset - n; 192 total = map->range[rank + 1] - map->range[rank]; 193 if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 194 for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) { 195 PetscInt itotal; 196 PetscInt overlap, oStart, oEnd; 197 198 itotal = map->range[i + 1] - map->range[i]; 199 if (itotal <= 0) continue; 200 oStart = PetscMax(myOffset, map->range[i]); 201 oEnd = PetscMin(nextOffset, map->range[i + 1]); 202 overlap = oEnd - oStart; 203 if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 204 /* send first message */ 205 PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]))); 206 } else if (overlap > 0) { 207 /* send second message */ 208 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 209 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 210 /* send empty second message */ 211 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]))); 212 } 213 } 214 filled = 0; 215 sender = -1; 216 if (total > 0) { 217 PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus)); 218 sender = firststatus.MPI_SOURCE; 219 PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled)); 220 } 221 while (filled < total) { 222 PetscMPIInt mfilled; 223 MPI_Status stat; 224 225 sender++; 226 PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat)); 227 PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled)); 228 filled += mfilled; 229 } 230 PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 231 PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 232 PetscCall(PetscFree2(firstreqs, secondreqs)); 233 PetscFunctionReturn(0); 234 } 235 236 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 237 { 238 PetscMPIInt size, rank; 239 PetscInt *pivots = NULL, *buffer; 240 PetscInt i, j; 241 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 242 243 PetscFunctionBegin; 244 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 245 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 246 PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 247 /* sort locally */ 248 PetscCall(PetscSortInt(mapin->n, keysin)); 249 /* get P - 1 pivots */ 250 PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 251 /* determine which entries in the sorted array go to which other processes based on the pivots */ 252 for (i = 0, j = 0; i < size - 1; i++) { 253 PetscInt prev = j; 254 255 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 256 offsets_snd[i] = prev; 257 keys_per_snd[i] = j - prev; 258 } 259 offsets_snd[size - 1] = j; 260 keys_per_snd[size - 1] = mapin->n - j; 261 offsets_snd[size] = mapin->n; 262 /* get the incoming sizes */ 263 PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 264 offsets_rcv[0] = 0; 265 for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i]; 266 nrecv = offsets_rcv[size]; 267 /* all to all exchange */ 268 PetscCall(PetscMalloc1(nrecv, &buffer)); 269 PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 270 PetscCall(PetscFree(pivots)); 271 PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 272 273 /* local sort */ 274 PetscCall(PetscSortInt(nrecv, buffer)); 275 #if defined(PETSC_USE_DEBUG) 276 { 277 PetscBool sorted; 278 279 PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 280 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 281 } 282 #endif 283 284 /* redistribute to the desired order */ 285 PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 286 PetscCall(PetscFree(buffer)); 287 PetscFunctionReturn(0); 288 } 289 290 /*@ 291 PetscParallelSortInt - Globally sort a distributed array of integers 292 293 Collective 294 295 Input Parameters: 296 + mapin - PetscLayout describing the distribution of the input keys 297 . mapout - PetscLayout describing the distribution of the output keys 298 - keysin - the pre-sorted array of integers 299 300 Output Parameter: 301 . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout. 302 303 Level: developer 304 305 Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does: 306 307 - sorts locally 308 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 309 - using to the pivots to repartition the keys by all-to-all exchange 310 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 311 - redistributing to match the mapout layout 312 313 If keysin != keysout, then keysin will not be changed during PetscParallelSortInt. 314 315 .seealso: `PetscParallelSortedInt()` 316 @*/ 317 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 318 { 319 PetscMPIInt size; 320 PetscMPIInt result; 321 PetscInt *keysincopy = NULL; 322 323 PetscFunctionBegin; 324 PetscValidPointer(mapin, 1); 325 PetscValidPointer(mapout, 2); 326 PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 327 PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 328 PetscCall(PetscLayoutSetUp(mapin)); 329 PetscCall(PetscLayoutSetUp(mapout)); 330 if (mapin->n) PetscValidIntPointer(keysin, 3); 331 if (mapout->n) PetscValidIntPointer(keysout, 4); 332 PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 333 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 334 if (size == 1) { 335 if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 336 PetscCall(PetscSortInt(mapout->n, keysout)); 337 if (size == 1) PetscFunctionReturn(0); 338 } 339 if (keysout != keysin) { 340 PetscCall(PetscMalloc1(mapin->n, &keysincopy)); 341 PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 342 keysin = keysincopy; 343 } 344 PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 345 #if defined(PETSC_USE_DEBUG) 346 { 347 PetscBool sorted; 348 349 PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 350 PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 351 } 352 #endif 353 PetscCall(PetscFree(keysincopy)); 354 PetscFunctionReturn(0); 355 } 356