1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 5 { 6 PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right; 7 return (l < r) ? -1 : (l > r); 8 } 9 10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 11 { 12 PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right; 13 return (l < r) ? -1 : (l > r); 14 } 15 16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 17 { 18 PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right; 19 return (l < r) ? -1 : (l > r); 20 } 21 22 #define MIN_GALLOP_CONST_GLOBAL 8 23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 24 typedef int (*CompFunc)(const void *, const void *, void *); 25 26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */ 27 #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__) 28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 29 { 30 __builtin_memcpy(t, b, size); 31 __builtin_memmove(b, a, size); 32 __builtin_memcpy(a, t, size); 33 return; 34 } 35 36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 37 { 38 __builtin_memcpy(t, ar, asize); 39 __builtin_memmove(ar, al, asize); 40 __builtin_memcpy(al, t, asize); 41 __builtin_memcpy(t, br, bsize); 42 __builtin_memmove(br, bl, bsize); 43 __builtin_memcpy(bl, t, bsize); 44 return; 45 } 46 47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 48 { 49 __builtin_memcpy(dest, src, size); 50 return; 51 } 52 53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 54 { 55 __builtin_memcpy(adest, asrc, asize); 56 __builtin_memcpy(bdest, bsrc, bsize); 57 return; 58 } 59 60 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 61 { 62 __builtin_memmove(dest, src, size); 63 return; 64 } 65 66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 67 { 68 __builtin_memmove(adest, asrc, asize); 69 __builtin_memmove(bdest, bsrc, bsize); 70 return; 71 } 72 #else 73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 74 { 75 PetscFunctionBegin; 76 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size)); 77 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size)); 78 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size)); 79 PetscFunctionReturnVoid(); 80 } 81 82 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 83 { 84 PetscFunctionBegin; 85 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize)); 86 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize)); 87 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize)); 88 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize)); 89 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize)); 90 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize)); 91 PetscFunctionReturnVoid(); 92 } 93 94 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 95 { 96 PetscFunctionBegin; 97 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size)); 98 PetscFunctionReturnVoid(); 99 } 100 101 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 102 { 103 PetscFunctionBegin; 104 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize)); 105 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize)); 106 PetscFunctionReturnVoid(); 107 } 108 109 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 110 { 111 PetscFunctionBegin; 112 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size)); 113 PetscFunctionReturnVoid(); 114 } 115 116 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 117 { 118 PetscFunctionBegin; 119 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize)); 120 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize)); 121 PetscFunctionReturnVoid(); 122 } 123 #endif 124 125 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] > 126 x. Output also inclusive. 127 128 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array: 129 130 {0,1,2,3,4,5} 131 132 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6. 133 */ 134 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 135 { 136 PetscInt last = l, k = 1, mid, cur = l + 1; 137 138 PetscFunctionBegin; 139 *m = l; 140 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l); 141 if ((*cmp)(x, arr + r * size, ctx) >= 0) { 142 *m = r; 143 PetscFunctionReturn(PETSC_SUCCESS); 144 } 145 if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 146 while (PETSC_TRUE) { 147 if (PetscUnlikely(cur > r)) { 148 cur = r; 149 break; 150 } 151 if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break; 152 last = cur; 153 cur += (k <<= 1) + 1; 154 ++k; 155 } 156 /* standard binary search but take last 0 mid 0 cur 1 into account*/ 157 while (cur > last + 1) { 158 mid = last + ((cur - last) >> 1); 159 if ((*cmp)(x, (arr) + mid * size, ctx) < 0) { 160 cur = mid; 161 } else { 162 last = mid; 163 } 164 } 165 *m = cur; 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m] 170 < x. Output also inclusive */ 171 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 172 { 173 PetscInt last = r, k = 1, mid, cur = r - 1; 174 175 PetscFunctionBegin; 176 *m = r; 177 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l); 178 if ((*cmp)(x, arr + l * size, ctx) <= 0) { 179 *m = l; 180 PetscFunctionReturn(PETSC_SUCCESS); 181 } 182 if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 183 while (PETSC_TRUE) { 184 if (PetscUnlikely(cur < l)) { 185 cur = l; 186 break; 187 } 188 if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break; 189 last = cur; 190 cur -= (k <<= 1) + 1; 191 ++k; 192 } 193 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/ 194 while (last > cur + 1) { 195 mid = last - ((last - cur) >> 1); 196 if ((*cmp)(x, (arr) + mid * size, ctx) > 0) { 197 cur = mid; 198 } else { 199 last = mid; 200 } 201 } 202 *m = cur; 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to 207 complete array, left is first index of left array, mid is first index of right array, right is last index of right 208 array */ 209 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 210 { 211 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 212 const PetscInt llen = mid - left; 213 214 PetscFunctionBegin; 215 Petsc_memcpy(tarr, arr + (left * size), llen * size); 216 while ((i < llen) && (j <= right)) { 217 if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) { 218 Petsc_memcpy(arr + (k * size), tarr + (i * size), size); 219 ++k; 220 gallopright = 0; 221 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 222 PetscInt l1, l2, diff1, diff2; 223 do { 224 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 225 /* search temp for right[j], can move up to that of temp into arr immediately */ 226 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 227 diff1 = l1 - i; 228 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 229 k += diff1; 230 i = l1; 231 /* search right for temp[i], can move up to that many of right into arr */ 232 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 233 diff2 = l2 - j; 234 Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size); 235 k += diff2; 236 j = l2; 237 if (i >= llen || j > right) break; 238 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 239 ++MIN_GALLOP_GLOBAL; 240 } 241 } else { 242 Petsc_memmove(arr + (k * size), arr + (j * size), size); 243 ++k; 244 gallopleft = 0; 245 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 246 PetscInt l1, l2, diff1, diff2; 247 do { 248 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 249 /* search right for temp[i], can move up to that many of right into arr */ 250 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 251 diff2 = l2 - j; 252 Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size); 253 k += diff2; 254 j = l2; 255 /* search temp for right[j], can copy up to that of temp into arr immediately */ 256 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 257 diff1 = l1 - i; 258 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 259 k += diff1; 260 i = l1; 261 if (i >= llen || j > right) break; 262 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 263 ++MIN_GALLOP_GLOBAL; 264 } 265 } 266 } 267 if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 272 { 273 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 274 const PetscInt llen = mid - left; 275 276 PetscFunctionBegin; 277 Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize); 278 while ((i < llen) && (j <= right)) { 279 if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) { 280 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize); 281 ++k; 282 gallopright = 0; 283 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 284 PetscInt l1, l2, diff1, diff2; 285 do { 286 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 287 /* search temp for right[j], can move up to that of temp into arr immediately */ 288 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 289 diff1 = l1 - i; 290 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 291 k += diff1; 292 i = l1; 293 /* search right for temp[i], can move up to that many of right into arr */ 294 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 295 diff2 = l2 - j; 296 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 297 k += diff2; 298 j = l2; 299 if (i >= llen || j > right) break; 300 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 301 ++MIN_GALLOP_GLOBAL; 302 } 303 } else { 304 Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize); 305 ++k; 306 gallopleft = 0; 307 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 308 PetscInt l1, l2, diff1, diff2; 309 do { 310 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 311 /* search right for temp[i], can move up to that many of right into arr */ 312 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 313 diff2 = l2 - j; 314 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 315 k += diff2; 316 j = l2; 317 /* search temp for right[j], can copy up to that of temp into arr immediately */ 318 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 319 diff1 = l1 - i; 320 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 321 k += diff1; 322 i = l1; 323 if (i >= llen || j > right) break; 324 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 325 ++MIN_GALLOP_GLOBAL; 326 } 327 } 328 } 329 if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to 334 complete array, left is first index of left array, mid is first index of right array, right is last index of right 335 array */ 336 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 337 { 338 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 339 const PetscInt rlen = right - mid + 1; 340 341 PetscFunctionBegin; 342 Petsc_memcpy(tarr, (arr) + mid * size, rlen * size); 343 while ((i >= 0) && (j >= left)) { 344 if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) { 345 Petsc_memcpy((arr) + k * size, (tarr) + i * size, size); 346 --k; 347 gallopleft = 0; 348 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 349 PetscInt l1, l2, diff1, diff2; 350 do { 351 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 352 /* search temp for left[j], can copy up to that many of temp into arr */ 353 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 354 diff1 = i - l1; 355 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 356 k -= diff1; 357 i = l1; 358 /* search left for temp[i], can move up to that many of left up arr */ 359 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 360 diff2 = j - l2; 361 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 362 k -= diff2; 363 j = l2; 364 if (i < 0 || j < left) break; 365 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 366 ++MIN_GALLOP_GLOBAL; 367 } 368 } else { 369 Petsc_memmove((arr) + k * size, (arr) + j * size, size); 370 --k; 371 gallopright = 0; 372 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 373 PetscInt l1, l2, diff1, diff2; 374 do { 375 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 376 /* search left for temp[i], can move up to that many of left up arr */ 377 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 378 diff2 = j - l2; 379 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 380 k -= diff2; 381 j = l2; 382 /* search temp for left[j], can copy up to that many of temp into arr */ 383 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 384 diff1 = i - l1; 385 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 386 k -= diff1; 387 i = l1; 388 if (i < 0 || j < left) break; 389 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 390 ++MIN_GALLOP_GLOBAL; 391 } 392 } 393 } 394 if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 399 { 400 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 401 const PetscInt rlen = right - mid + 1; 402 403 PetscFunctionBegin; 404 Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize); 405 while ((i >= 0) && (j >= left)) { 406 if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) { 407 Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize); 408 --k; 409 gallopleft = 0; 410 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 411 PetscInt l1, l2, diff1, diff2; 412 do { 413 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 414 /* search temp for left[j], can copy up to that many of temp into arr */ 415 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 416 diff1 = i - l1; 417 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 418 k -= diff1; 419 i = l1; 420 /* search left for temp[i], can move up to that many of left up arr */ 421 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 422 diff2 = j - l2; 423 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 424 k -= diff2; 425 j = l2; 426 if (i < 0 || j < left) break; 427 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 428 ++MIN_GALLOP_GLOBAL; 429 } 430 } else { 431 Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize); 432 --k; 433 gallopright = 0; 434 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 435 PetscInt l1, l2, diff1, diff2; 436 do { 437 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 438 /* search left for temp[i], can move up to that many of left up arr */ 439 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 440 diff2 = j - l2; 441 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 442 k -= diff2; 443 j = l2; 444 /* search temp for left[j], can copy up to that many of temp into arr */ 445 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 446 diff1 = i - l1; 447 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 448 k -= diff1; 449 i = l1; 450 if (i < 0 || j < left) break; 451 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 452 ++MIN_GALLOP_GLOBAL; 453 } 454 } 455 } 456 if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper 461 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */ 462 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 463 { 464 PetscInt i = start == left ? start + 1 : start; 465 466 PetscFunctionBegin; 467 for (; i <= right; ++i) { 468 PetscInt j = i - 1; 469 Petsc_memcpy(tarr, arr + (i * size), size); 470 while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) { 471 COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size); 472 --j; 473 } 474 Petsc_memcpy((arr) + (j + 1) * size, tarr, size); 475 } 476 PetscFunctionReturn(PETSC_SUCCESS); 477 } 478 479 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 480 { 481 PetscInt i = start == left ? start + 1 : start; 482 483 PetscFunctionBegin; 484 for (; i <= right; ++i) { 485 PetscInt j = i - 1; 486 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 487 while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) { 488 COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize); 489 --j; 490 } 491 Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize); 492 } 493 PetscFunctionReturn(PETSC_SUCCESS); 494 } 495 496 /* See PetscInsertionSort_Private */ 497 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 498 { 499 PetscInt i = start == left ? start + 1 : start; 500 501 PetscFunctionBegin; 502 for (; i <= right; ++i) { 503 PetscInt l = left, r = i; 504 Petsc_memcpy(tarr, arr + (i * size), size); 505 do { 506 const PetscInt m = l + ((r - l) >> 1); 507 if ((*cmp)(tarr, arr + (m * size), ctx) < 0) { 508 r = m; 509 } else { 510 l = m + 1; 511 } 512 } while (l < r); 513 Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size); 514 Petsc_memcpy(arr + (l * size), tarr, size); 515 } 516 PetscFunctionReturn(PETSC_SUCCESS); 517 } 518 519 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 520 { 521 PetscInt i = start == left ? start + 1 : start; 522 523 PetscFunctionBegin; 524 for (; i <= right; ++i) { 525 PetscInt l = left, r = i; 526 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 527 do { 528 const PetscInt m = l + ((r - l) >> 1); 529 if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) { 530 r = m; 531 } else { 532 l = m + 1; 533 } 534 } while (l < r); 535 Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize); 536 Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize); 537 } 538 PetscFunctionReturn(PETSC_SUCCESS); 539 } 540 541 typedef struct { 542 PetscInt size; 543 PetscInt start; 544 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */ 545 } PetscTimSortStack; 546 #else 547 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt)); 548 #endif 549 550 typedef struct { 551 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN); 552 size_t size; 553 size_t maxsize; 554 } PetscTimSortBuffer; 555 556 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize) 557 { 558 PetscFunctionBegin; 559 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(PETSC_SUCCESS); 560 { 561 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */ 562 size_t newMax = PetscMin(newSize * newSize, buff->maxsize); 563 PetscCall(PetscFree(buff->ptr)); 564 PetscCall(PetscMalloc1(newMax, &buff->ptr)); 565 buff->size = newMax; 566 } 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize) 571 { 572 PetscFunctionBegin; 573 for (; stacksize; --stacksize) { 574 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 575 if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) { 576 PetscInt l, m = stack[stacksize].start, r; 577 /* Search A for B[0] insertion */ 578 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l)); 579 /* Search B for A[-1] insertion */ 580 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r)); 581 if (m - l <= r - m) { 582 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 583 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 584 } else { 585 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 586 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 587 } 588 } 589 /* Update A with merge */ 590 stack[stacksize - 1].size += stack[stacksize].size; 591 } 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize) 596 { 597 PetscFunctionBegin; 598 for (; stacksize; --stacksize) { 599 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 600 if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) { 601 PetscInt l, m = stack[stacksize].start, r; 602 /* Search A for B[0] insertion */ 603 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l)); 604 /* Search B for A[-1] insertion */ 605 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r)); 606 if (m - l <= r - m) { 607 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 608 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 609 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 610 } else { 611 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 612 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 613 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 614 } 615 } 616 /* Update A with merge */ 617 stack[stacksize - 1].size += stack[stacksize].size; 618 } 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 622 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize) 623 { 624 PetscInt i = *stacksize; 625 626 PetscFunctionBegin; 627 while (i) { 628 PetscInt l, m, r, itemp = i; 629 630 if (i == 1) { 631 /* A = stack[i-1], B = stack[i] */ 632 if (stack[i - 1].size < stack[i].size) { 633 /* if A[-1] <= B[0] then sorted */ 634 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 635 m = stack[i].start; 636 /* Search A for B[0] insertion */ 637 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l)); 638 /* Search B for A[-1] insertion */ 639 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r)); 640 if (m - l <= r - m) { 641 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 642 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 643 } else { 644 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 645 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 646 } 647 } 648 /* Update A with merge */ 649 stack[i - 1].size += stack[i].size; 650 --i; 651 } 652 } else { 653 /* i > 2, i.e. C exists 654 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 655 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 656 if (stack[i - 2].size < stack[i].size) { 657 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 658 if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) { 659 m = stack[i - 1].start; 660 /* Search A for B[0] insertion */ 661 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l)); 662 /* Search B for A[-1] insertion */ 663 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r)); 664 if (m - l <= r - m) { 665 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 666 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 667 } else { 668 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 669 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 670 } 671 } 672 /* Update A with merge */ 673 stack[i - 2].size += stack[i - 1].size; 674 /* Push C up the stack */ 675 stack[i - 1].start = stack[i].start; 676 stack[i - 1].size = stack[i].size; 677 } else { 678 /* merge C into B */ 679 mergeBC: 680 /* If B[-1] <= C[0] then... you know the drill */ 681 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 682 m = stack[i].start; 683 /* Search B for C[0] insertion */ 684 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l)); 685 /* Search C for B[-1] insertion */ 686 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r)); 687 if (m - l <= r - m) { 688 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 689 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 690 } else { 691 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 692 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 693 } 694 } 695 /* Update B with merge */ 696 stack[i - 1].size += stack[i].size; 697 } 698 --i; 699 } else if (stack[i - 1].size <= stack[i].size) { 700 /* merge C into B */ 701 goto mergeBC; 702 } 703 } 704 if (itemp == i) break; 705 } 706 *stacksize = i; 707 PetscFunctionReturn(PETSC_SUCCESS); 708 } 709 710 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize) 711 { 712 PetscInt i = *stacksize; 713 714 PetscFunctionBegin; 715 while (i) { 716 PetscInt l, m, r, itemp = i; 717 718 if (i == 1) { 719 /* A = stack[i-1], B = stack[i] */ 720 if (stack[i - 1].size < stack[i].size) { 721 /* if A[-1] <= B[0] then sorted */ 722 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 723 m = stack[i].start; 724 /* Search A for B[0] insertion */ 725 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l)); 726 /* Search B for A[-1] insertion */ 727 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r)); 728 if (m - l <= r - m) { 729 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 730 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 731 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 732 } else { 733 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 734 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 735 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 736 } 737 } 738 /* Update A with merge */ 739 stack[i - 1].size += stack[i].size; 740 --i; 741 } 742 } else { 743 /* i > 2, i.e. C exists 744 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 745 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 746 if (stack[i - 2].size < stack[i].size) { 747 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 748 if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) { 749 m = stack[i - 1].start; 750 /* Search A for B[0] insertion */ 751 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l)); 752 /* Search B for A[-1] insertion */ 753 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r)); 754 if (m - l <= r - m) { 755 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 756 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 757 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 758 } else { 759 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 760 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 761 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 762 } 763 } 764 /* Update A with merge */ 765 stack[i - 2].size += stack[i - 1].size; 766 /* Push C up the stack */ 767 stack[i - 1].start = stack[i].start; 768 stack[i - 1].size = stack[i].size; 769 } else { 770 /* merge C into B */ 771 mergeBC: 772 /* If B[-1] <= C[0] then... you know the drill */ 773 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 774 m = stack[i].start; 775 /* Search B for C[0] insertion */ 776 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l)); 777 /* Search C for B[-1] insertion */ 778 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r)); 779 if (m - l <= r - m) { 780 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 781 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 782 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 783 } else { 784 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 785 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 786 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 787 } 788 } 789 /* Update B with merge */ 790 stack[i - 1].size += stack[i].size; 791 } 792 --i; 793 } else if (stack[i - 1].size <= stack[i].size) { 794 /* merge C into B */ 795 goto mergeBC; 796 } 797 } 798 if (itemp == i) break; 799 } 800 *stacksize = i; 801 PetscFunctionReturn(PETSC_SUCCESS); 802 } 803 804 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous 805 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either 806 binary insertion sort or regulat insertion sort */ 807 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 808 { 809 const PetscInt re = PetscMin(runstart + minrun, n - 1); 810 PetscInt ri = runstart; 811 812 PetscFunctionBegin; 813 if (PetscUnlikely(runstart == n - 1)) { 814 *runend = runstart; 815 PetscFunctionReturn(PETSC_SUCCESS); 816 } 817 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 818 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) { 819 ++ri; 820 while (ri < n - 1) { 821 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break; 822 ++ri; 823 } 824 { 825 PetscInt lo = runstart, hi = ri; 826 for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size); 827 } 828 } else { 829 ++ri; 830 while (ri < n - 1) { 831 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break; 832 ++ri; 833 } 834 } 835 if (ri < re) { 836 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 837 binary search */ 838 if (ri - runstart <= minrun >> 1) { 839 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 840 PetscCall(PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 841 } else { 842 PetscCall(PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 843 } 844 *runend = re; 845 } else *runend = ri; 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 850 { 851 const PetscInt re = PetscMin(runstart + minrun, n - 1); 852 PetscInt ri = runstart; 853 854 PetscFunctionBegin; 855 if (PetscUnlikely(runstart == n - 1)) { 856 *runend = runstart; 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 860 if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) { 861 ++ri; 862 while (ri < n - 1) { 863 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break; 864 ++ri; 865 } 866 { 867 PetscInt lo = runstart, hi = ri; 868 for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr); 869 } 870 } else { 871 ++ri; 872 while (ri < n - 1) { 873 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break; 874 ++ri; 875 } 876 } 877 if (ri < re) { 878 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 879 binary search */ 880 if (ri - runstart <= minrun >> 1) { 881 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 882 PetscCall(PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 883 } else { 884 PetscCall(PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 885 } 886 *runend = re; 887 } else *runend = ri; 888 PetscFunctionReturn(PETSC_SUCCESS); 889 } 890 891 /*@C 892 PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm. 893 894 Not Collective 895 896 Input Parameters: 897 + n - number of values 898 . arr - array to be sorted 899 . size - size in bytes of the datatype held in arr 900 . cmp - function pointer to comparison function 901 - ctx - optional context to be passed to comparison function, NULL if not needed 902 903 Output Parameters: 904 . arr - sorted array 905 906 Notes: 907 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 908 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to 909 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To 910 do so it repeatedly triggers attempts throughout to merge adjacent runs. 911 912 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 913 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 914 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 915 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 916 likely all contain similar data. 917 918 Sample usage: 919 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 920 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 921 may also 922 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if 923 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing 924 order 925 926 .vb 927 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 928 my_type l = *(my_type *) left, r = *(my_type *) right; 929 return (l < r) ? -1 : (l > r); 930 } 931 .ve 932 Note the context is unused here but you may use it to pass and subsequently access whatever information required 933 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 934 Then pass the function 935 .vb 936 PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx) 937 .ve 938 939 Fortran Notes: 940 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 941 returns result. For example 942 .vb 943 subroutine CompareIntegers(left,right,ctx,result) 944 implicit none 945 946 PetscInt,intent(in) :: left, right 947 type(UserCtx) :: ctx 948 integer,intent(out) :: result 949 950 if (left < right) then 951 result = -1 952 else if (left == right) then 953 result = 0 954 else 955 result = 1 956 end if 957 return 958 end subroutine CompareIntegers 959 .ve 960 961 References: 962 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 963 964 Level: developer 965 966 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()` 967 @*/ 968 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx) 969 { 970 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 971 PetscTimSortStack runstack[128]; 972 PetscTimSortBuffer buff; 973 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 974 It is so unlikely that this limit is reached that this is __never__ checked for */ 975 976 PetscFunctionBegin; 977 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 978 is a power of 2 or one plus a power of 2 */ 979 { 980 PetscInt t = n, r = 0; 981 /* r becomes 1 if the least significant bits contain at least one off bit */ 982 while (t >= 64) { 983 r |= t & 1; 984 t >>= 1; 985 } 986 minrun = t + r; 987 } 988 if (PetscDefined(USE_DEBUG)) { 989 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 990 if (n < 64) { 991 PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n)); 992 } else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 993 } 994 PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr)); 995 buff.size = (size_t)minrun * size; 996 buff.maxsize = (size_t)n * size; 997 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 998 while (runstart < n) { 999 /* Check if additional entries are at least partially ordered and build natural run */ 1000 PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend)); 1001 runstack[stacksize].start = runstart; 1002 runstack[stacksize].size = runend - runstart + 1; 1003 PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize)); 1004 ++stacksize; 1005 runstart = runend + 1; 1006 } 1007 /* Have been inside while, so discard last stacksize++ */ 1008 --stacksize; 1009 PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize)); 1010 PetscCall(PetscFree(buff.ptr)); 1011 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1012 PetscFunctionReturn(PETSC_SUCCESS); 1013 } 1014 1015 /*@C 1016 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and 1017 reorders a second array to match the first. The arrays need not be the same type. 1018 1019 Not Collective 1020 1021 Input Parameters: 1022 + n - number of values 1023 . asize - size in bytes of the datatype held in arr 1024 . bsize - size in bytes of the datatype held in barr 1025 . cmp - function pointer to comparison function 1026 - ctx - optional context to be passed to comparison function, NULL if not needed 1027 1028 Input/Output Parameters: 1029 + arr - array to be sorted, on output it is sorted 1030 - barr - array to be reordered, on output it is reordered 1031 1032 Notes: 1033 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT 1034 overlap. 1035 1036 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 1037 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims 1038 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced 1039 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs. 1040 1041 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 1042 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 1043 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 1044 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 1045 likely all contain similar data. 1046 1047 Sample usage: 1048 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 1049 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 1050 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only 1051 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in 1052 increasing order 1053 1054 .vb 1055 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 1056 my_type l = *(my_type *) left, r = *(my_type *) right; 1057 return (l < r) ? -1 : (l > r); 1058 } 1059 .ve 1060 Note the context is unused here but you may use it to pass and subsequently access whatever information required 1061 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 1062 Then pass the function 1063 .vb 1064 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx) 1065 .ve 1066 1067 Fortran Notes: 1068 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 1069 returns result. For example 1070 .vb 1071 subroutine CompareIntegers(left,right,ctx,result) 1072 implicit none 1073 1074 PetscInt,intent(in) :: left, right 1075 type(UserCtx) :: ctx 1076 integer,intent(out) :: result 1077 1078 if (left < right) then 1079 result = -1 1080 else if (left == right) then 1081 result = 0 1082 else 1083 result = 1 1084 end if 1085 return 1086 end subroutine CompareIntegers 1087 .ve 1088 1089 References: 1090 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 1091 1092 Level: developer 1093 1094 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()` 1095 @*/ 1096 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx) 1097 { 1098 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 1099 PetscTimSortStack runstack[128]; 1100 PetscTimSortBuffer abuff, bbuff; 1101 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 1102 It is so unlikely that this limit is reached that this is __never__ checked for */ 1103 1104 PetscFunctionBegin; 1105 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 1106 is a power of 2 or one plus a power of 2 */ 1107 { 1108 PetscInt t = n, r = 0; 1109 /* r becomes 1 if the least significant bits contain at least one off bit */ 1110 while (t >= 64) { 1111 r |= t & 1; 1112 t >>= 1; 1113 } 1114 minrun = t + r; 1115 } 1116 if (PetscDefined(USE_DEBUG)) { 1117 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 1118 PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 1119 } 1120 PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr)); 1121 abuff.size = (size_t)minrun * asize; 1122 abuff.maxsize = (size_t)n * asize; 1123 PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr)); 1124 bbuff.size = (size_t)minrun * bsize; 1125 bbuff.maxsize = (size_t)n * bsize; 1126 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1127 while (runstart < n) { 1128 /* Check if additional entries are at least partially ordered and build natural run */ 1129 PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend)); 1130 runstack[stacksize].start = runstart; 1131 runstack[stacksize].size = runend - runstart + 1; 1132 PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize)); 1133 ++stacksize; 1134 runstart = runend + 1; 1135 } 1136 /* Have been inside while, so discard last stacksize++ */ 1137 --stacksize; 1138 PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize)); 1139 PetscCall(PetscFree(abuff.ptr)); 1140 PetscCall(PetscFree(bbuff.ptr)); 1141 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 /*@ 1146 PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order. 1147 1148 Not Collective 1149 1150 Input Parameters: 1151 + n - number of values 1152 - arr - array of integers 1153 1154 Output Parameters: 1155 . arr - sorted array of integers 1156 1157 Notes: 1158 If the array is less than 64 entries long `PetscSortInt()` is automatically used. 1159 1160 This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is 1161 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1162 recommended that the user benchmark their code to see which routine is fastest. 1163 1164 Level: intermediate 1165 1166 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()` 1167 @*/ 1168 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[]) 1169 { 1170 PetscFunctionBegin; 1171 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1172 PetscValidIntPointer(arr, 2); 1173 if (n < 64) { 1174 PetscCall(PetscSortInt(n, arr)); 1175 } else { 1176 PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1177 } 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 /*@ 1182 PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second 1183 `PetscInt` array to match the first. 1184 1185 Not Collective 1186 1187 Input Parameter: 1188 . n - number of values 1189 1190 Input/Output Parameters: 1191 + arr1 - array of integers to be sorted, modified on output 1192 - arr2 - array of integers to be reordered, modified on output 1193 1194 Notes: 1195 The arrays CANNOT overlap. 1196 1197 This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is 1198 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1199 recommended that the user benchmark their code to see which routine is fastest. 1200 1201 Level: intermediate 1202 1203 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()` 1204 @*/ 1205 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[]) 1206 { 1207 PetscFunctionBegin; 1208 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1209 PetscValidIntPointer(arr1, 2); 1210 PetscValidIntPointer(arr2, 3); 1211 /* cannot export out to PetscIntSortWithArray here since it isn't stable */ 1212 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1213 PetscFunctionReturn(PETSC_SUCCESS); 1214 } 1215 1216 /*@ 1217 PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order. 1218 1219 Not Collective 1220 1221 Input Parameters: 1222 + n - number of values 1223 - arr - array of `PetscMPIInt` 1224 1225 Output Parameters: 1226 . arr - sorted array of integers 1227 1228 Notes: 1229 If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used. 1230 1231 This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is 1232 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1233 recommended that the user benchmark their code to see which routine is fastest. 1234 1235 Level: intermediate 1236 1237 .seealso: `PetscTimSort()`, `PetscSortMPIInt()` 1238 @*/ 1239 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[]) 1240 { 1241 PetscFunctionBegin; 1242 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1243 PetscValidIntPointer(arr, 2); 1244 if (n < 64) { 1245 PetscCall(PetscSortMPIInt(n, arr)); 1246 } else { 1247 PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1248 } 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 /*@ 1253 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt` 1254 array to match the first. 1255 1256 Not Collective 1257 1258 Input Parameter: 1259 . n - number of values 1260 1261 Input/Output Parameters: 1262 . arr1 - array of integers to be sorted, modified on output 1263 - arr2 - array of integers to be reordered, modified on output 1264 1265 Notes: 1266 The arrays CANNOT overlap. 1267 1268 This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is 1269 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1270 recommended that the user benchmark their code to see which routine is fastest. 1271 1272 Level: intermediate 1273 1274 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()` 1275 @*/ 1276 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[]) 1277 { 1278 PetscFunctionBegin; 1279 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1280 PetscValidIntPointer(arr1, 2); 1281 PetscValidIntPointer(arr2, 3); 1282 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */ 1283 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1284 PetscFunctionReturn(PETSC_SUCCESS); 1285 } 1286 1287 /*@ 1288 PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order. 1289 1290 Not Collective 1291 1292 Input Parameters: 1293 + n - number of values 1294 - arr - array of `PetscReal` 1295 1296 Output Parameters: 1297 . arr - sorted array of integers 1298 1299 Notes: 1300 If the array is less than 64 entries long `PetscSortReal()` is automatically used. 1301 1302 This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is 1303 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1304 recommended that the user benchmark their code to see which routine is fastest. 1305 1306 Level: intermediate 1307 1308 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()` 1309 @*/ 1310 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[]) 1311 { 1312 PetscFunctionBegin; 1313 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1314 PetscValidRealPointer(arr, 2); 1315 if (n < 64) { 1316 PetscCall(PetscSortReal(n, arr)); 1317 } else { 1318 PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL)); 1319 } 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 1323 /*@ 1324 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second 1325 array of `PetscInt` to match the first. 1326 1327 Not Collective 1328 1329 Input Parameter: 1330 . n - number of values 1331 1332 Input/Output Parameters: 1333 . arr1 - array of `PetscReal` to be sorted, modified on output 1334 - arr2 - array of `PetscInt` to be reordered, modified on output 1335 1336 Notes: 1337 This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is 1338 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1339 recommended that the user benchmark their code to see which routine is fastest. 1340 1341 Level: intermediate 1342 1343 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()` 1344 @*/ 1345 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[]) 1346 { 1347 PetscFunctionBegin; 1348 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1349 PetscValidRealPointer(arr1, 2); 1350 PetscValidIntPointer(arr2, 3); 1351 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */ 1352 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL)); 1353 PetscFunctionReturn(PETSC_SUCCESS); 1354 } 1355