1 #include <petsc/private/petscimpl.h> 2 #include <petscis.h> /*I "petscis.h" I*/ 3 4 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 5 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 6 { 7 PetscInt diff; 8 PetscInt split, mid, partner; 9 10 PetscFunctionBegin; 11 diff = rankEnd - rankStart; 12 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 13 if (diff == 1) { 14 if (forward) { 15 PetscCall(PetscSortInt((PetscInt)n, keys)); 16 } else { 17 PetscCall(PetscSortReverseInt((PetscInt)n, keys)); 18 } 19 PetscFunctionReturn(PETSC_SUCCESS); 20 } 21 split = 1; 22 while (2 * split < diff) split *= 2; 23 mid = rankStart + split; 24 if (rank < mid) { 25 partner = rank + split; 26 } else { 27 partner = rank - split; 28 } 29 if (partner < rankEnd) { 30 PetscMPIInt i; 31 32 PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE)); 33 if ((rank < partner) == (forward == PETSC_TRUE)) { 34 for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 35 } else { 36 for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 37 } 38 } 39 /* divide and conquer */ 40 if (rank < mid) { 41 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward)); 42 } else { 43 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 44 } 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 49 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 50 { 51 PetscInt diff; 52 PetscInt mid; 53 54 PetscFunctionBegin; 55 diff = rankEnd - rankStart; 56 if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS); 57 if (diff == 1) { 58 if (forward) { 59 PetscCall(PetscSortInt(n, keys)); 60 } else { 61 PetscCall(PetscSortReverseInt(n, keys)); 62 } 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 mid = rankStart + diff / 2; 66 /* divide and conquer */ 67 if (rank < mid) { 68 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward)); 69 } else { 70 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward)); 71 } 72 /* bitonic merge */ 73 PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward)); 74 PetscFunctionReturn(PETSC_SUCCESS); 75 } 76 77 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 78 { 79 PetscMPIInt size, rank, tag, mpin; 80 PetscInt *buffer; 81 82 PetscFunctionBegin; 83 PetscAssertPointer(keys, 3); 84 PetscCall(PetscCommGetNewTag(comm, &tag)); 85 PetscCallMPI(MPI_Comm_size(comm, &size)); 86 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 87 PetscCall(PetscMPIIntCast(n, &mpin)); 88 PetscCall(PetscMalloc1(n, &buffer)); 89 PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE)); 90 PetscCall(PetscFree(buffer)); 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 94 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 95 { 96 PetscMPIInt size, rank; 97 PetscInt *pivots, *finalpivots, i; 98 PetscInt non_empty, my_first, count; 99 PetscMPIInt *keys_per, max_keys_per; 100 101 PetscFunctionBegin; 102 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 103 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 104 105 /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 106 PetscCall(PetscMalloc1(size - 1, &pivots)); 107 for (i = 0; i < size - 1; i++) { 108 PetscInt index; 109 110 if (!mapin->n) { 111 /* if this rank is empty, put "infinity" in the list. each process knows how many empty 112 * processes are in the layout, so those values will be ignored from the end of the sorted 113 * pivots */ 114 pivots[i] = PETSC_MAX_INT; 115 } else { 116 /* match the distribution to the desired output described by mapout by getting the index 117 * that is approximately the appropriate fraction through the list */ 118 index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N); 119 index = PetscMin(index, (mapin->n - 1)); 120 index = PetscMax(index, 0); 121 pivots[i] = keysin[index]; 122 } 123 } 124 /* sort the pivots in parallel */ 125 PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots)); 126 if (PetscDefined(USE_DEBUG)) { 127 PetscBool sorted; 128 129 PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted)); 130 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed"); 131 } 132 133 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 134 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 135 non_empty = size; 136 for (i = 0; i < size; i++) 137 if (mapout->range[i] == mapout->range[i + 1]) non_empty--; 138 PetscCall(PetscCalloc1(size + 1, &keys_per)); 139 my_first = -1; 140 if (non_empty) { 141 for (i = 0; i < size - 1; i++) { 142 size_t sample = (i + 1) * non_empty - 1; 143 size_t sample_rank = sample / (size - 1); 144 145 keys_per[sample_rank]++; 146 if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1)); 147 } 148 } 149 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 150 PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots)); 151 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 152 * and allgather them */ 153 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 154 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 155 PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm)); 156 for (i = 0, count = 0; i < size; i++) { 157 PetscInt j; 158 159 for (j = 0; j < max_keys_per; j++) { 160 if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j]; 161 } 162 } 163 *outpivots = finalpivots; 164 PetscCall(PetscFree(keys_per)); 165 PetscCall(PetscFree(pivots)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 170 { 171 PetscMPIInt size, rank; 172 PetscInt myOffset, nextOffset; 173 PetscInt i; 174 PetscMPIInt total, filled; 175 PetscMPIInt sender, nfirst, nsecond; 176 PetscMPIInt firsttag, secondtag; 177 MPI_Request firstreqrcv; 178 MPI_Request *firstreqs; 179 MPI_Request *secondreqs; 180 MPI_Status firststatus; 181 182 PetscFunctionBegin; 183 PetscCallMPI(MPI_Comm_size(map->comm, &size)); 184 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 185 PetscCall(PetscCommGetNewTag(map->comm, &firsttag)); 186 PetscCall(PetscCommGetNewTag(map->comm, &secondtag)); 187 myOffset = 0; 188 PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs)); 189 PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm)); 190 myOffset = nextOffset - n; 191 total = map->range[rank + 1] - map->range[rank]; 192 if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv)); 193 for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) { 194 PetscInt itotal; 195 PetscInt overlap, oStart, oEnd; 196 197 itotal = map->range[i + 1] - map->range[i]; 198 if (itotal <= 0) continue; 199 oStart = PetscMax(myOffset, map->range[i]); 200 oEnd = PetscMin(nextOffset, map->range[i + 1]); 201 overlap = oEnd - oStart; 202 if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 203 /* send first message */ 204 PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &firstreqs[nfirst++])); 205 } else if (overlap > 0) { 206 /* send second message */ 207 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 208 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 209 /* send empty second message */ 210 PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &secondreqs[nsecond++])); 211 } 212 } 213 filled = 0; 214 sender = -1; 215 if (total > 0) { 216 PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus)); 217 sender = firststatus.MPI_SOURCE; 218 PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled)); 219 } 220 while (filled < total) { 221 PetscMPIInt mfilled; 222 MPI_Status stat; 223 224 sender++; 225 PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat)); 226 PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled)); 227 filled += mfilled; 228 } 229 PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE)); 230 PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE)); 231 PetscCall(PetscFree2(firstreqs, secondreqs)); 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 236 { 237 PetscMPIInt size, rank; 238 PetscInt *pivots = NULL, *buffer; 239 PetscInt i, j; 240 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 241 242 PetscFunctionBegin; 243 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 244 PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank)); 245 PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv)); 246 /* sort locally */ 247 PetscCall(PetscSortInt(mapin->n, keysin)); 248 /* get P - 1 pivots */ 249 PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots)); 250 /* determine which entries in the sorted array go to which other processes based on the pivots */ 251 for (i = 0, j = 0; i < size - 1; i++) { 252 PetscInt prev = j; 253 254 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 255 offsets_snd[i] = prev; 256 keys_per_snd[i] = j - prev; 257 } 258 offsets_snd[size - 1] = j; 259 keys_per_snd[size - 1] = mapin->n - j; 260 offsets_snd[size] = mapin->n; 261 /* get the incoming sizes */ 262 PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm)); 263 offsets_rcv[0] = 0; 264 for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i]; 265 nrecv = offsets_rcv[size]; 266 /* all to all exchange */ 267 PetscCall(PetscMalloc1(nrecv, &buffer)); 268 PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm)); 269 PetscCall(PetscFree(pivots)); 270 PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv)); 271 272 /* local sort */ 273 PetscCall(PetscSortInt(nrecv, buffer)); 274 #if defined(PETSC_USE_DEBUG) 275 { 276 PetscBool sorted; 277 278 PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted)); 279 PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed"); 280 } 281 #endif 282 283 /* redistribute to the desired order */ 284 PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout)); 285 PetscCall(PetscFree(buffer)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 /*@ 290 PetscParallelSortInt - Globally sort a distributed array of integers 291 292 Collective 293 294 Input Parameters: 295 + mapin - `PetscLayout` describing the distribution of the input keys 296 . mapout - `PetscLayout` describing the desired distribution of the output keys 297 - keysin - the pre-sorted array of integers 298 299 Output Parameter: 300 . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout. 301 302 Level: developer 303 304 Notes: 305 306 This implements a distributed samplesort, which, with local array sizes n_in and n_out, 307 global size N, and global number of MPI processes P, does\: 308 .vb 309 - sorts locally 310 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 311 - using to the pivots to repartition the keys by all-to-all exchange 312 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 313 - redistributing to match the mapout layout 314 .ve 315 316 If `keysin` != `keysout`, then `keysin` will not be changed during `PetscParallelSortInt()`. 317 318 .seealso: `PetscSortInt()`, `PetscParallelSortedInt()` 319 @*/ 320 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 321 { 322 PetscMPIInt size; 323 PetscMPIInt result; 324 PetscInt *keysincopy = NULL; 325 326 PetscFunctionBegin; 327 PetscAssertPointer(mapin, 1); 328 PetscAssertPointer(mapout, 2); 329 PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result)); 330 PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 331 PetscCall(PetscLayoutSetUp(mapin)); 332 PetscCall(PetscLayoutSetUp(mapout)); 333 if (mapin->n) PetscAssertPointer(keysin, 3); 334 if (mapout->n) PetscAssertPointer(keysout, 4); 335 PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N); 336 PetscCallMPI(MPI_Comm_size(mapin->comm, &size)); 337 if (size == 1) { 338 if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt))); 339 PetscCall(PetscSortInt(mapout->n, keysout)); 340 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 if (keysout != keysin) { 343 PetscCall(PetscMalloc1(mapin->n, &keysincopy)); 344 PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt))); 345 keysin = keysincopy; 346 } 347 PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout)); 348 #if defined(PETSC_USE_DEBUG) 349 { 350 PetscBool sorted; 351 352 PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted)); 353 PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed"); 354 } 355 #endif 356 PetscCall(PetscFree(keysincopy)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359