1 2 #include <petsc/private/petscimpl.h> 3 #include <petscis.h> /*I "petscis.h" I*/ 4 5 /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 6 static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 7 { 8 PetscInt diff; 9 PetscInt split, mid, partner; 10 PetscErrorCode ierr; 11 12 PetscFunctionBegin; 13 diff = rankEnd - rankStart; 14 if (diff <= 0) PetscFunctionReturn(0); 15 if (diff == 1) { 16 if (forward) { 17 ierr = PetscSortInt((PetscInt) n, keys);CHKERRQ(ierr); 18 } else { 19 ierr = PetscSortReverseInt((PetscInt) n, keys);CHKERRQ(ierr); 20 } 21 PetscFunctionReturn(0); 22 } 23 split = 1; 24 while (2 * split < diff) split *= 2; 25 mid = rankStart + split; 26 if (rank < mid) { 27 partner = rank + split; 28 } else { 29 partner = rank - split; 30 } 31 if (partner < rankEnd) { 32 PetscMPIInt i; 33 34 ierr = MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);CHKERRQ(ierr); 35 if ((rank < partner) == (forward == PETSC_TRUE)) { 36 for (i = 0; i < n; i++) { 37 keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i]; 38 } 39 } else { 40 for (i = 0; i < n; i++) { 41 keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i]; 42 } 43 } 44 } 45 /* divide and conquer */ 46 if (rank < mid) { 47 ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);CHKERRQ(ierr); 48 } else { 49 ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr); 50 } 51 PetscFunctionReturn(0); 52 } 53 54 /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */ 55 static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward) 56 { 57 PetscInt diff; 58 PetscInt mid; 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 diff = rankEnd - rankStart; 63 if (diff <= 0) PetscFunctionReturn(0); 64 if (diff == 1) { 65 if (forward) { 66 ierr = PetscSortInt(n, keys);CHKERRQ(ierr); 67 } else { 68 ierr = PetscSortReverseInt(n, keys);CHKERRQ(ierr); 69 } 70 PetscFunctionReturn(0); 71 } 72 mid = rankStart + diff / 2; 73 /* divide and conquer */ 74 if (rank < mid) { 75 ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool) !forward);CHKERRQ(ierr); 76 } else { 77 ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr); 78 } 79 /* bitonic merge */ 80 ierr = PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[]) 85 { 86 PetscMPIInt size, rank, tag, mpin; 87 PetscInt *buffer; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 PetscValidIntPointer(keys, 3); 92 ierr = PetscCommGetNewTag(comm, &tag);CHKERRQ(ierr); 93 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 94 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 95 ierr = PetscMPIIntCast(n, &mpin);CHKERRQ(ierr); 96 ierr = PetscMalloc1(n, &buffer);CHKERRQ(ierr); 97 ierr = PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);CHKERRQ(ierr); 98 ierr = PetscFree(buffer);CHKERRQ(ierr); 99 PetscFunctionReturn(0); 100 } 101 102 static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[]) 103 { 104 PetscMPIInt size, rank; 105 PetscInt *pivots, *finalpivots, i; 106 PetscInt non_empty, my_first, count; 107 PetscMPIInt *keys_per, max_keys_per; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr); 112 ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr); 113 114 /* Choose P - 1 pivots that would be ideal for the distribution on this process */ 115 ierr = PetscMalloc1(size - 1, &pivots);CHKERRQ(ierr); 116 for (i = 0; i < size - 1; i++) { 117 PetscInt index; 118 119 if (!mapin->n) { 120 /* if this rank is empty, put "infinity" in the list. each process knows how many empty 121 * processes are in the layout, so those values will be ignored from the end of the sorted 122 * pivots */ 123 pivots[i] = PETSC_MAX_INT; 124 } else { 125 /* match the distribution to the desired output described by mapout by getting the index 126 * that is approximately the appropriate fraction through the list */ 127 index = ((PetscReal) mapout->range[i + 1]) * ((PetscReal) mapin->n) / ((PetscReal) mapout->N); 128 index = PetscMin(index, (mapin->n - 1)); 129 index = PetscMax(index, 0); 130 pivots[i] = keysin[index]; 131 } 132 } 133 /* sort the pivots in parallel */ 134 ierr = PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);CHKERRQ(ierr); 135 #if defined(PETSC_USE_DEBUG) 136 { 137 PetscBool sorted; 138 139 ierr = PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);CHKERRQ(ierr); 140 if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");CHKERRQ(ierr); 141 } 142 #endif 143 144 /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select 145 * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */ 146 non_empty = size; 147 for (i = 0; i < size; i++) if (mapout->range[i] == mapout->range[i+1]) non_empty--; 148 ierr = PetscCalloc1(size + 1, &keys_per);CHKERRQ(ierr); 149 my_first = -1; 150 if (non_empty) { 151 for (i = 0; i < size - 1; i++) { 152 size_t sample = (i + 1) * non_empty - 1; 153 size_t sample_rank = sample / (size - 1); 154 155 keys_per[sample_rank]++; 156 if (my_first < 0 && (PetscMPIInt) sample_rank == rank) { 157 my_first = (PetscInt) (sample - sample_rank * (size - 1)); 158 } 159 } 160 } 161 for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per); 162 ierr = PetscMalloc1(size * max_keys_per, &finalpivots);CHKERRQ(ierr); 163 /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array 164 * and allgather them */ 165 for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty]; 166 for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT; 167 ierr = MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm); 168 for (i = 0, count = 0; i < size; i++) { 169 PetscInt j; 170 171 for (j = 0; j < max_keys_per; j++) { 172 if (j < keys_per[i]) { 173 finalpivots[count++] = finalpivots[i * max_keys_per + j]; 174 } 175 } 176 } 177 *outpivots = finalpivots; 178 ierr = PetscFree(keys_per);CHKERRQ(ierr); 179 ierr = PetscFree(pivots);CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 183 static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[]) 184 { 185 PetscMPIInt size, rank; 186 PetscInt myOffset, nextOffset; 187 PetscInt i; 188 PetscMPIInt total, filled; 189 PetscMPIInt sender, nfirst, nsecond; 190 PetscMPIInt firsttag, secondtag; 191 MPI_Request firstreqrcv; 192 MPI_Request *firstreqs; 193 MPI_Request *secondreqs; 194 MPI_Status firststatus; 195 196 PetscErrorCode ierr; 197 198 PetscFunctionBegin; 199 ierr = MPI_Comm_size(map->comm, &size);CHKERRQ(ierr); 200 ierr = MPI_Comm_rank(map->comm, &rank);CHKERRQ(ierr); 201 ierr = PetscCommGetNewTag(map->comm, &firsttag);CHKERRQ(ierr); 202 ierr = PetscCommGetNewTag(map->comm, &secondtag);CHKERRQ(ierr); 203 myOffset = 0; 204 ierr = PetscMalloc2(size, &firstreqs, size, &secondreqs);CHKERRQ(ierr); 205 ierr = MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);CHKERRQ(ierr); 206 myOffset = nextOffset - n; 207 total = map->range[rank + 1] - map->range[rank]; 208 if (total > 0) { 209 ierr = MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);CHKERRQ(ierr); 210 } 211 for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) { 212 PetscInt itotal; 213 PetscInt overlap, oStart, oEnd; 214 215 itotal = map->range[i + 1] - map->range[i]; 216 if (itotal <= 0) continue; 217 oStart = PetscMax(myOffset, map->range[i]); 218 oEnd = PetscMin(nextOffset, map->range[i + 1]); 219 overlap = oEnd - oStart; 220 if (map->range[i] >= myOffset && map->range[i] < nextOffset) { 221 /* send first message */ 222 ierr = MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));CHKERRQ(ierr); 223 } else if (overlap > 0) { 224 /* send second message */ 225 ierr = MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr); 226 } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) { 227 /* send empty second message */ 228 ierr = MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));CHKERRQ(ierr); 229 } 230 } 231 filled = 0; 232 sender = -1; 233 if (total > 0) { 234 ierr = MPI_Wait(&firstreqrcv, &firststatus);CHKERRQ(ierr); 235 sender = firststatus.MPI_SOURCE; 236 ierr = MPI_Get_count(&firststatus, MPIU_INT, &filled);CHKERRQ(ierr); 237 } 238 while (filled < total) { 239 PetscMPIInt mfilled; 240 MPI_Status stat; 241 242 sender++; 243 ierr = MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);CHKERRQ(ierr); 244 ierr = MPI_Get_count(&stat, MPIU_INT, &mfilled);CHKERRQ(ierr); 245 filled += mfilled; 246 } 247 ierr = MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 248 ierr = MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 249 ierr = PetscFree2(firstreqs, secondreqs);CHKERRQ(ierr); 250 PetscFunctionReturn(0); 251 } 252 253 static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 254 { 255 PetscMPIInt size, rank; 256 PetscInt *pivots = NULL, *buffer; 257 PetscInt i, j; 258 PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv; 259 PetscErrorCode ierr; 260 261 PetscFunctionBegin; 262 ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr); 263 ierr = MPI_Comm_rank(mapin->comm, &rank);CHKERRQ(ierr); 264 ierr = PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);CHKERRQ(ierr); 265 /* sort locally */ 266 ierr = PetscSortInt(mapin->n, keysin);CHKERRQ(ierr); 267 /* get P - 1 pivots */ 268 ierr = PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);CHKERRQ(ierr); 269 /* determine which entries in the sorted array go to which other processes based on the pivots */ 270 for (i = 0, j = 0; i < size - 1; i++) { 271 PetscInt prev = j; 272 273 while ((j < mapin->n) && (keysin[j] < pivots[i])) j++; 274 offsets_snd[i] = prev; 275 keys_per_snd[i] = j - prev; 276 } 277 offsets_snd[size - 1] = j; 278 keys_per_snd[size - 1] = mapin->n - j; 279 offsets_snd[size] = mapin->n; 280 /* get the incoming sizes */ 281 ierr = MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);CHKERRQ(ierr); 282 offsets_rcv[0] = 0; 283 for (i = 0; i < size; i++) { 284 offsets_rcv[i+1] = offsets_rcv[i] + keys_per_rcv[i]; 285 } 286 nrecv = offsets_rcv[size]; 287 /* all to all exchange */ 288 ierr = PetscMalloc1(nrecv, &buffer);CHKERRQ(ierr); 289 ierr = MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);CHKERRQ(ierr); 290 ierr = PetscFree(pivots);CHKERRQ(ierr); 291 ierr = PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);CHKERRQ(ierr); 292 293 /* local sort */ 294 ierr = PetscSortInt(nrecv, buffer);CHKERRQ(ierr); 295 #if defined(PETSC_USE_DEBUG) 296 { 297 PetscBool sorted; 298 299 ierr = PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);CHKERRQ(ierr); 300 if (!sorted) SETERRQ(mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");CHKERRQ(ierr); 301 } 302 #endif 303 304 /* redistribute to the desired order */ 305 ierr = PetscParallelRedistribute(mapout, nrecv, buffer, keysout);CHKERRQ(ierr); 306 ierr = PetscFree(buffer);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 /*@ 311 PetscParallelSortInt - Globally sort a distributed array of integers 312 313 Collective 314 315 Input Parameters: 316 + mapin - PetscLayout describing the distribution of the input keys 317 . mapout - PetscLayout describing the distribution of the output keys 318 - keysin - the pre-sorted array of integers 319 320 Output Parameter: 321 . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout. 322 323 Level: developer 324 325 Notes: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does: 326 327 - sorts locally 328 - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those 329 - using to the pivots to repartition the keys by all-to-all exchange 330 - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout) 331 - redistributing to match the mapout layout 332 333 If keysin != keysout, then keysin will not be changed during PetscParallelSortInt. 334 335 .seealso: PetscParallelSortedInt() 336 @*/ 337 PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[]) 338 { 339 PetscMPIInt size; 340 PetscMPIInt result; 341 PetscInt *keysincopy = NULL; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 PetscValidPointer(mapin, 1); 346 PetscValidPointer(mapout, 2); 347 ierr = MPI_Comm_compare(mapin->comm, mapout->comm, &result);CHKERRQ(ierr); 348 if (result != MPI_IDENT && result != MPI_CONGRUENT) SETERRQ(mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator"); 349 ierr = PetscLayoutSetUp(mapin);CHKERRQ(ierr); 350 ierr = PetscLayoutSetUp(mapout);CHKERRQ(ierr); 351 if (mapin->n) PetscValidIntPointer(keysin, 3); 352 if (mapout->n) PetscValidIntPointer(keysout, 4); 353 if (mapin->N != mapout->N) SETERRQ2(mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%D != %D)", mapin->N, mapout->N); 354 ierr = MPI_Comm_size(mapin->comm, &size);CHKERRQ(ierr); 355 if (size == 1) { 356 if (keysout != keysin) { 357 ierr = PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr); 358 } 359 ierr = PetscSortInt(mapout->n, keysout);CHKERRQ(ierr); 360 if (size == 1) PetscFunctionReturn(0); 361 } 362 if (keysout != keysin) { 363 ierr = PetscMalloc1(mapin->n, &keysincopy);CHKERRQ(ierr); 364 ierr = PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));CHKERRQ(ierr); 365 keysin = keysincopy; 366 } 367 ierr = PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);CHKERRQ(ierr); 368 #if defined(PETSC_USE_DEBUG) 369 { 370 PetscBool sorted; 371 372 ierr = PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);CHKERRQ(ierr); 373 if (!sorted) SETERRQ(mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");CHKERRQ(ierr); 374 } 375 #endif 376 ierr = PetscFree(keysincopy);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379