1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 5 { 6 PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right; 7 return (l < r) ? -1 : (l > r); 8 } 9 10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 11 { 12 PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right; 13 return (l < r) ? -1 : (l > r); 14 } 15 16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 17 { 18 PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right; 19 return (l < r) ? -1 : (l > r); 20 } 21 22 #define MIN_GALLOP_CONST_GLOBAL 8 23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 24 typedef int (*CompFunc)(const void *, const void *, void *); 25 26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */ 27 #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__) 28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 29 { 30 __builtin_memcpy(t, b, size); 31 __builtin_memmove(b, a, size); 32 __builtin_memcpy(a, t, size); 33 return; 34 } 35 36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 37 { 38 __builtin_memcpy(t, ar, asize); 39 __builtin_memmove(ar, al, asize); 40 __builtin_memcpy(al, t, asize); 41 __builtin_memcpy(t, br, bsize); 42 __builtin_memmove(br, bl, bsize); 43 __builtin_memcpy(bl, t, bsize); 44 return; 45 } 46 47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 48 { 49 __builtin_memcpy(dest, src, size); 50 return; 51 } 52 53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 54 { 55 __builtin_memcpy(adest, asrc, asize); 56 __builtin_memcpy(bdest, bsrc, bsize); 57 return; 58 } 59 60 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 61 { 62 __builtin_memmove(dest, src, size); 63 return; 64 } 65 66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 67 { 68 __builtin_memmove(adest, asrc, asize); 69 __builtin_memmove(bdest, bsrc, bsize); 70 return; 71 } 72 #else 73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 74 { 75 PetscFunctionBegin; 76 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size)); 77 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size)); 78 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size)); 79 PetscFunctionReturnVoid(); 80 } 81 82 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 83 { 84 PetscFunctionBegin; 85 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize)); 86 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize)); 87 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize)); 88 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize)); 89 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize)); 90 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize)); 91 PetscFunctionReturnVoid(); 92 } 93 94 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 95 { 96 PetscFunctionBegin; 97 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size)); 98 PetscFunctionReturnVoid(); 99 } 100 101 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 102 { 103 PetscFunctionBegin; 104 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize)); 105 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize)); 106 PetscFunctionReturnVoid(); 107 } 108 109 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 110 { 111 PetscFunctionBegin; 112 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size)); 113 PetscFunctionReturnVoid(); 114 } 115 116 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 117 { 118 PetscFunctionBegin; 119 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize)); 120 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize)); 121 PetscFunctionReturnVoid(); 122 } 123 #endif 124 125 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] > 126 x. Output also inclusive. 127 128 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array: 129 130 {0,1,2,3,4,5} 131 132 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6. 133 */ 134 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 135 { 136 PetscInt last = l, k = 1, mid, cur = l + 1; 137 138 PetscFunctionBegin; 139 *m = l; 140 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l); 141 if ((*cmp)(x, arr + r * size, ctx) >= 0) { 142 *m = r; 143 PetscFunctionReturn(PETSC_SUCCESS); 144 } 145 if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 146 while (PETSC_TRUE) { 147 if (PetscUnlikely(cur > r)) { 148 cur = r; 149 break; 150 } 151 if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break; 152 last = cur; 153 cur += (k <<= 1) + 1; 154 ++k; 155 } 156 /* standard binary search but take last 0 mid 0 cur 1 into account*/ 157 while (cur > last + 1) { 158 mid = last + ((cur - last) >> 1); 159 if ((*cmp)(x, (arr) + mid * size, ctx) < 0) { 160 cur = mid; 161 } else { 162 last = mid; 163 } 164 } 165 *m = cur; 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m] 170 < x. Output also inclusive */ 171 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 172 { 173 PetscInt last = r, k = 1, mid, cur = r - 1; 174 175 PetscFunctionBegin; 176 *m = r; 177 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l); 178 if ((*cmp)(x, arr + l * size, ctx) <= 0) { 179 *m = l; 180 PetscFunctionReturn(PETSC_SUCCESS); 181 } 182 if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 183 while (PETSC_TRUE) { 184 if (PetscUnlikely(cur < l)) { 185 cur = l; 186 break; 187 } 188 if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break; 189 last = cur; 190 cur -= (k <<= 1) + 1; 191 ++k; 192 } 193 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/ 194 while (last > cur + 1) { 195 mid = last - ((last - cur) >> 1); 196 if ((*cmp)(x, (arr) + mid * size, ctx) > 0) { 197 cur = mid; 198 } else { 199 last = mid; 200 } 201 } 202 *m = cur; 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to 207 complete array, left is first index of left array, mid is first index of right array, right is last index of right 208 array */ 209 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 210 { 211 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 212 const PetscInt llen = mid - left; 213 214 PetscFunctionBegin; 215 Petsc_memcpy(tarr, arr + (left * size), llen * size); 216 while ((i < llen) && (j <= right)) { 217 if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) { 218 Petsc_memcpy(arr + (k * size), tarr + (i * size), size); 219 ++k; 220 gallopright = 0; 221 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 222 PetscInt l1, l2, diff1, diff2; 223 do { 224 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 225 /* search temp for right[j], can move up to that of temp into arr immediately */ 226 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 227 diff1 = l1 - i; 228 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 229 k += diff1; 230 i = l1; 231 /* search right for temp[i], can move up to that many of right into arr */ 232 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 233 diff2 = l2 - j; 234 Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size); 235 k += diff2; 236 j = l2; 237 if (i >= llen || j > right) break; 238 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 239 ++MIN_GALLOP_GLOBAL; 240 } 241 } else { 242 Petsc_memmove(arr + (k * size), arr + (j * size), size); 243 ++k; 244 gallopleft = 0; 245 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 246 PetscInt l1, l2, diff1, diff2; 247 do { 248 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 249 /* search right for temp[i], can move up to that many of right into arr */ 250 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 251 diff2 = l2 - j; 252 Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size); 253 k += diff2; 254 j = l2; 255 /* search temp for right[j], can copy up to that of temp into arr immediately */ 256 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 257 diff1 = l1 - i; 258 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 259 k += diff1; 260 i = l1; 261 if (i >= llen || j > right) break; 262 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 263 ++MIN_GALLOP_GLOBAL; 264 } 265 } 266 } 267 if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 272 { 273 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 274 const PetscInt llen = mid - left; 275 276 PetscFunctionBegin; 277 Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize); 278 while ((i < llen) && (j <= right)) { 279 if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) { 280 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize); 281 ++k; 282 gallopright = 0; 283 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 284 PetscInt l1, l2, diff1, diff2; 285 do { 286 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 287 /* search temp for right[j], can move up to that of temp into arr immediately */ 288 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 289 diff1 = l1 - i; 290 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 291 k += diff1; 292 i = l1; 293 /* search right for temp[i], can move up to that many of right into arr */ 294 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 295 diff2 = l2 - j; 296 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 297 k += diff2; 298 j = l2; 299 if (i >= llen || j > right) break; 300 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 301 ++MIN_GALLOP_GLOBAL; 302 } 303 } else { 304 Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize); 305 ++k; 306 gallopleft = 0; 307 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 308 PetscInt l1, l2, diff1, diff2; 309 do { 310 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 311 /* search right for temp[i], can move up to that many of right into arr */ 312 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 313 diff2 = l2 - j; 314 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 315 k += diff2; 316 j = l2; 317 /* search temp for right[j], can copy up to that of temp into arr immediately */ 318 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 319 diff1 = l1 - i; 320 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 321 k += diff1; 322 i = l1; 323 if (i >= llen || j > right) break; 324 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 325 ++MIN_GALLOP_GLOBAL; 326 } 327 } 328 } 329 if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to 334 complete array, left is first index of left array, mid is first index of right array, right is last index of right 335 array */ 336 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 337 { 338 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 339 const PetscInt rlen = right - mid + 1; 340 341 PetscFunctionBegin; 342 Petsc_memcpy(tarr, (arr) + mid * size, rlen * size); 343 while ((i >= 0) && (j >= left)) { 344 if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) { 345 Petsc_memcpy((arr) + k * size, (tarr) + i * size, size); 346 --k; 347 gallopleft = 0; 348 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 349 PetscInt l1, l2, diff1, diff2; 350 do { 351 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 352 /* search temp for left[j], can copy up to that many of temp into arr */ 353 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 354 diff1 = i - l1; 355 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 356 k -= diff1; 357 i = l1; 358 /* search left for temp[i], can move up to that many of left up arr */ 359 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 360 diff2 = j - l2; 361 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 362 k -= diff2; 363 j = l2; 364 if (i < 0 || j < left) break; 365 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 366 ++MIN_GALLOP_GLOBAL; 367 } 368 } else { 369 Petsc_memmove((arr) + k * size, (arr) + j * size, size); 370 --k; 371 gallopright = 0; 372 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 373 PetscInt l1, l2, diff1, diff2; 374 do { 375 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 376 /* search left for temp[i], can move up to that many of left up arr */ 377 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 378 diff2 = j - l2; 379 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 380 k -= diff2; 381 j = l2; 382 /* search temp for left[j], can copy up to that many of temp into arr */ 383 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 384 diff1 = i - l1; 385 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 386 k -= diff1; 387 i = l1; 388 if (i < 0 || j < left) break; 389 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 390 ++MIN_GALLOP_GLOBAL; 391 } 392 } 393 } 394 if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 399 { 400 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 401 const PetscInt rlen = right - mid + 1; 402 403 PetscFunctionBegin; 404 Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize); 405 while ((i >= 0) && (j >= left)) { 406 if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) { 407 Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize); 408 --k; 409 gallopleft = 0; 410 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 411 PetscInt l1, l2, diff1, diff2; 412 do { 413 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 414 /* search temp for left[j], can copy up to that many of temp into arr */ 415 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 416 diff1 = i - l1; 417 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 418 k -= diff1; 419 i = l1; 420 /* search left for temp[i], can move up to that many of left up arr */ 421 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 422 diff2 = j - l2; 423 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 424 k -= diff2; 425 j = l2; 426 if (i < 0 || j < left) break; 427 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 428 ++MIN_GALLOP_GLOBAL; 429 } 430 } else { 431 Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize); 432 --k; 433 gallopright = 0; 434 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 435 PetscInt l1, l2, diff1, diff2; 436 do { 437 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 438 /* search left for temp[i], can move up to that many of left up arr */ 439 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 440 diff2 = j - l2; 441 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 442 k -= diff2; 443 j = l2; 444 /* search temp for left[j], can copy up to that many of temp into arr */ 445 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 446 diff1 = i - l1; 447 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 448 k -= diff1; 449 i = l1; 450 if (i < 0 || j < left) break; 451 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 452 ++MIN_GALLOP_GLOBAL; 453 } 454 } 455 } 456 if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper 461 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */ 462 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 463 { 464 PetscInt i = start == left ? start + 1 : start; 465 466 PetscFunctionBegin; 467 for (; i <= right; ++i) { 468 PetscInt j = i - 1; 469 Petsc_memcpy(tarr, arr + (i * size), size); 470 while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) { 471 COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size); 472 --j; 473 } 474 Petsc_memcpy((arr) + (j + 1) * size, tarr, size); 475 } 476 PetscFunctionReturn(PETSC_SUCCESS); 477 } 478 479 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 480 { 481 PetscInt i = start == left ? start + 1 : start; 482 483 PetscFunctionBegin; 484 for (; i <= right; ++i) { 485 PetscInt j = i - 1; 486 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 487 while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) { 488 COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize); 489 --j; 490 } 491 Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize); 492 } 493 PetscFunctionReturn(PETSC_SUCCESS); 494 } 495 496 /* See PetscInsertionSort_Private */ 497 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 498 { 499 PetscInt i = start == left ? start + 1 : start; 500 501 PetscFunctionBegin; 502 for (; i <= right; ++i) { 503 PetscInt l = left, r = i; 504 Petsc_memcpy(tarr, arr + (i * size), size); 505 do { 506 const PetscInt m = l + ((r - l) >> 1); 507 if ((*cmp)(tarr, arr + (m * size), ctx) < 0) { 508 r = m; 509 } else { 510 l = m + 1; 511 } 512 } while (l < r); 513 Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size); 514 Petsc_memcpy(arr + (l * size), tarr, size); 515 } 516 PetscFunctionReturn(PETSC_SUCCESS); 517 } 518 519 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 520 { 521 PetscInt i = start == left ? start + 1 : start; 522 523 PetscFunctionBegin; 524 for (; i <= right; ++i) { 525 PetscInt l = left, r = i; 526 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 527 do { 528 const PetscInt m = l + ((r - l) >> 1); 529 if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) { 530 r = m; 531 } else { 532 l = m + 1; 533 } 534 } while (l < r); 535 Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize); 536 Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize); 537 } 538 PetscFunctionReturn(PETSC_SUCCESS); 539 } 540 541 typedef struct { 542 PetscInt size; 543 PetscInt start; 544 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */ 545 } PetscTimSortStack; 546 #else 547 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt)); 548 #endif 549 550 typedef struct { 551 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN); 552 size_t size; 553 size_t maxsize; 554 } PetscTimSortBuffer; 555 556 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize) 557 { 558 PetscFunctionBegin; 559 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(PETSC_SUCCESS); 560 { 561 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */ 562 size_t newMax = PetscMin(newSize * newSize, buff->maxsize); 563 PetscCall(PetscFree(buff->ptr)); 564 PetscCall(PetscMalloc1(newMax, &buff->ptr)); 565 buff->size = newMax; 566 } 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize) 571 { 572 PetscFunctionBegin; 573 for (; stacksize; --stacksize) { 574 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 575 if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) { 576 PetscInt l, m = stack[stacksize].start, r; 577 /* Search A for B[0] insertion */ 578 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l)); 579 /* Search B for A[-1] insertion */ 580 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r)); 581 if (m - l <= r - m) { 582 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 583 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 584 } else { 585 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 586 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 587 } 588 } 589 /* Update A with merge */ 590 stack[stacksize - 1].size += stack[stacksize].size; 591 } 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize) 596 { 597 PetscFunctionBegin; 598 for (; stacksize; --stacksize) { 599 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 600 if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) { 601 PetscInt l, m = stack[stacksize].start, r; 602 /* Search A for B[0] insertion */ 603 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l)); 604 /* Search B for A[-1] insertion */ 605 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r)); 606 if (m - l <= r - m) { 607 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 608 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 609 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 610 } else { 611 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 612 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 613 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 614 } 615 } 616 /* Update A with merge */ 617 stack[stacksize - 1].size += stack[stacksize].size; 618 } 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 622 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize) 623 { 624 PetscInt i = *stacksize; 625 626 PetscFunctionBegin; 627 while (i) { 628 PetscInt l, m, r, itemp = i; 629 630 if (i == 1) { 631 /* A = stack[i-1], B = stack[i] */ 632 if (stack[i - 1].size < stack[i].size) { 633 /* if A[-1] <= B[0] then sorted */ 634 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 635 m = stack[i].start; 636 /* Search A for B[0] insertion */ 637 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l)); 638 /* Search B for A[-1] insertion */ 639 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r)); 640 if (m - l <= r - m) { 641 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 642 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 643 } else { 644 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 645 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 646 } 647 } 648 /* Update A with merge */ 649 stack[i - 1].size += stack[i].size; 650 --i; 651 } 652 } else { 653 /* i > 2, i.e. C exists 654 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 655 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 656 if (stack[i - 2].size < stack[i].size) { 657 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 658 if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) { 659 m = stack[i - 1].start; 660 /* Search A for B[0] insertion */ 661 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l)); 662 /* Search B for A[-1] insertion */ 663 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r)); 664 if (m - l <= r - m) { 665 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 666 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 667 } else { 668 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 669 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 670 } 671 } 672 /* Update A with merge */ 673 stack[i - 2].size += stack[i - 1].size; 674 /* Push C up the stack */ 675 stack[i - 1].start = stack[i].start; 676 stack[i - 1].size = stack[i].size; 677 } else { 678 /* merge C into B */ 679 mergeBC: 680 /* If B[-1] <= C[0] then... you know the drill */ 681 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 682 m = stack[i].start; 683 /* Search B for C[0] insertion */ 684 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l)); 685 /* Search C for B[-1] insertion */ 686 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r)); 687 if (m - l <= r - m) { 688 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 689 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 690 } else { 691 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 692 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 693 } 694 } 695 /* Update B with merge */ 696 stack[i - 1].size += stack[i].size; 697 } 698 --i; 699 } else if (stack[i - 1].size <= stack[i].size) { 700 /* merge C into B */ 701 goto mergeBC; 702 } 703 } 704 if (itemp == i) break; 705 } 706 *stacksize = i; 707 PetscFunctionReturn(PETSC_SUCCESS); 708 } 709 710 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize) 711 { 712 PetscInt i = *stacksize; 713 714 PetscFunctionBegin; 715 while (i) { 716 PetscInt l, m, r, itemp = i; 717 718 if (i == 1) { 719 /* A = stack[i-1], B = stack[i] */ 720 if (stack[i - 1].size < stack[i].size) { 721 /* if A[-1] <= B[0] then sorted */ 722 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 723 m = stack[i].start; 724 /* Search A for B[0] insertion */ 725 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l)); 726 /* Search B for A[-1] insertion */ 727 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r)); 728 if (m - l <= r - m) { 729 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 730 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 731 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 732 } else { 733 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 734 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 735 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 736 } 737 } 738 /* Update A with merge */ 739 stack[i - 1].size += stack[i].size; 740 --i; 741 } 742 } else { 743 /* i > 2, i.e. C exists 744 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 745 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 746 if (stack[i - 2].size < stack[i].size) { 747 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 748 if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) { 749 m = stack[i - 1].start; 750 /* Search A for B[0] insertion */ 751 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l)); 752 /* Search B for A[-1] insertion */ 753 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r)); 754 if (m - l <= r - m) { 755 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 756 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 757 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 758 } else { 759 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 760 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 761 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 762 } 763 } 764 /* Update A with merge */ 765 stack[i - 2].size += stack[i - 1].size; 766 /* Push C up the stack */ 767 stack[i - 1].start = stack[i].start; 768 stack[i - 1].size = stack[i].size; 769 } else { 770 /* merge C into B */ 771 mergeBC: 772 /* If B[-1] <= C[0] then... you know the drill */ 773 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 774 m = stack[i].start; 775 /* Search B for C[0] insertion */ 776 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l)); 777 /* Search C for B[-1] insertion */ 778 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r)); 779 if (m - l <= r - m) { 780 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 781 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 782 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 783 } else { 784 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 785 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 786 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 787 } 788 } 789 /* Update B with merge */ 790 stack[i - 1].size += stack[i].size; 791 } 792 --i; 793 } else if (stack[i - 1].size <= stack[i].size) { 794 /* merge C into B */ 795 goto mergeBC; 796 } 797 } 798 if (itemp == i) break; 799 } 800 *stacksize = i; 801 PetscFunctionReturn(PETSC_SUCCESS); 802 } 803 804 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous 805 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either 806 binary insertion sort or regulat insertion sort */ 807 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 808 { 809 const PetscInt re = PetscMin(runstart + minrun, n - 1); 810 PetscInt ri = runstart; 811 812 PetscFunctionBegin; 813 if (PetscUnlikely(runstart == n - 1)) { 814 *runend = runstart; 815 PetscFunctionReturn(PETSC_SUCCESS); 816 } 817 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 818 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) { 819 ++ri; 820 while (ri < n - 1) { 821 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break; 822 ++ri; 823 } 824 { 825 PetscInt lo = runstart, hi = ri; 826 for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size); 827 } 828 } else { 829 ++ri; 830 while (ri < n - 1) { 831 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break; 832 ++ri; 833 } 834 } 835 if (ri < re) { 836 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 837 binary search */ 838 if (ri - runstart <= minrun >> 1) { 839 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 840 PetscCall(PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 841 } else { 842 PetscCall(PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 843 } 844 *runend = re; 845 } else *runend = ri; 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 850 { 851 const PetscInt re = PetscMin(runstart + minrun, n - 1); 852 PetscInt ri = runstart; 853 854 PetscFunctionBegin; 855 if (PetscUnlikely(runstart == n - 1)) { 856 *runend = runstart; 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 860 if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) { 861 ++ri; 862 while (ri < n - 1) { 863 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break; 864 ++ri; 865 } 866 { 867 PetscInt lo = runstart, hi = ri; 868 for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr); 869 } 870 } else { 871 ++ri; 872 while (ri < n - 1) { 873 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break; 874 ++ri; 875 } 876 } 877 if (ri < re) { 878 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 879 binary search */ 880 if (ri - runstart <= minrun >> 1) { 881 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 882 PetscCall(PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 883 } else { 884 PetscCall(PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 885 } 886 *runend = re; 887 } else *runend = ri; 888 PetscFunctionReturn(PETSC_SUCCESS); 889 } 890 891 /*@C 892 PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm. 893 894 Not Collective 895 896 Input Parameters: 897 + n - number of values 898 . arr - array to be sorted 899 . size - size in bytes of the datatype held in arr 900 . cmp - function pointer to comparison function 901 - ctx - optional context to be passed to comparison function, NULL if not needed 902 903 Output Parameter: 904 . arr - sorted array 905 906 Notes: 907 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 908 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to 909 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To 910 do so it repeatedly triggers attempts throughout to merge adjacent runs. 911 912 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 913 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 914 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 915 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 916 likely all contain similar data. 917 918 Example Usage: 919 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 920 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 921 may also 922 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if 923 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing 924 order 925 926 .vb 927 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 928 my_type l = *(my_type *) left, r = *(my_type *) right; 929 return (l < r) ? -1 : (l > r); 930 } 931 .ve 932 933 Note the context is unused here but you may use it to pass and subsequently access whatever information required 934 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 935 Then pass the function 936 .vb 937 PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx) 938 .ve 939 940 Fortran Notes: 941 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 942 returns result. For example 943 .vb 944 subroutine CompareIntegers(left,right,ctx,result) 945 implicit none 946 947 PetscInt,intent(in) :: left, right 948 type(UserCtx) :: ctx 949 integer,intent(out) :: result 950 951 if (left < right) then 952 result = -1 953 else if (left == right) then 954 result = 0 955 else 956 result = 1 957 end if 958 return 959 end subroutine CompareIntegers 960 .ve 961 962 References: 963 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 964 965 Level: developer 966 967 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()` 968 @*/ 969 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx) 970 { 971 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 972 PetscTimSortStack runstack[128]; 973 PetscTimSortBuffer buff; 974 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 975 It is so unlikely that this limit is reached that this is __never__ checked for */ 976 977 PetscFunctionBegin; 978 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 979 is a power of 2 or one plus a power of 2 */ 980 { 981 PetscInt t = n, r = 0; 982 /* r becomes 1 if the least significant bits contain at least one off bit */ 983 while (t >= 64) { 984 r |= t & 1; 985 t >>= 1; 986 } 987 minrun = t + r; 988 } 989 if (PetscDefined(USE_DEBUG)) { 990 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 991 if (n < 64) { 992 PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n)); 993 } else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 994 } 995 PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr)); 996 buff.size = (size_t)minrun * size; 997 buff.maxsize = (size_t)n * size; 998 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 999 while (runstart < n) { 1000 /* Check if additional entries are at least partially ordered and build natural run */ 1001 PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend)); 1002 runstack[stacksize].start = runstart; 1003 runstack[stacksize].size = runend - runstart + 1; 1004 PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize)); 1005 ++stacksize; 1006 runstart = runend + 1; 1007 } 1008 /* Have been inside while, so discard last stacksize++ */ 1009 --stacksize; 1010 PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize)); 1011 PetscCall(PetscFree(buff.ptr)); 1012 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 /*@C 1017 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and 1018 reorders a second array to match the first. The arrays need not be the same type. 1019 1020 Not Collective 1021 1022 Input Parameters: 1023 + n - number of values 1024 . asize - size in bytes of the datatype held in arr 1025 . bsize - size in bytes of the datatype held in barr 1026 . cmp - function pointer to comparison function 1027 - ctx - optional context to be passed to comparison function, NULL if not needed 1028 1029 Input/Output Parameters: 1030 + arr - array to be sorted, on output it is sorted 1031 - barr - array to be reordered, on output it is reordered 1032 1033 Notes: 1034 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT 1035 overlap. 1036 1037 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 1038 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims 1039 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced 1040 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs. 1041 1042 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 1043 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 1044 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 1045 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 1046 likely all contain similar data. 1047 1048 Example Usage: 1049 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 1050 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 1051 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only 1052 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in 1053 increasing order 1054 1055 .vb 1056 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 1057 my_type l = *(my_type *) left, r = *(my_type *) right; 1058 return (l < r) ? -1 : (l > r); 1059 } 1060 .ve 1061 1062 Note the context is unused here but you may use it to pass and subsequently access whatever information required 1063 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 1064 Then pass the function 1065 .vb 1066 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx) 1067 .ve 1068 1069 Fortran Notes: 1070 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 1071 returns result. For example 1072 .vb 1073 subroutine CompareIntegers(left,right,ctx,result) 1074 implicit none 1075 1076 PetscInt,intent(in) :: left, right 1077 type(UserCtx) :: ctx 1078 integer,intent(out) :: result 1079 1080 if (left < right) then 1081 result = -1 1082 else if (left == right) then 1083 result = 0 1084 else 1085 result = 1 1086 end if 1087 return 1088 end subroutine CompareIntegers 1089 .ve 1090 1091 References: 1092 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 1093 1094 Level: developer 1095 1096 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()` 1097 @*/ 1098 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx) 1099 { 1100 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 1101 PetscTimSortStack runstack[128]; 1102 PetscTimSortBuffer abuff, bbuff; 1103 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 1104 It is so unlikely that this limit is reached that this is __never__ checked for */ 1105 1106 PetscFunctionBegin; 1107 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 1108 is a power of 2 or one plus a power of 2 */ 1109 { 1110 PetscInt t = n, r = 0; 1111 /* r becomes 1 if the least significant bits contain at least one off bit */ 1112 while (t >= 64) { 1113 r |= t & 1; 1114 t >>= 1; 1115 } 1116 minrun = t + r; 1117 } 1118 if (PetscDefined(USE_DEBUG)) { 1119 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 1120 PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 1121 } 1122 PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr)); 1123 abuff.size = (size_t)minrun * asize; 1124 abuff.maxsize = (size_t)n * asize; 1125 PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr)); 1126 bbuff.size = (size_t)minrun * bsize; 1127 bbuff.maxsize = (size_t)n * bsize; 1128 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1129 while (runstart < n) { 1130 /* Check if additional entries are at least partially ordered and build natural run */ 1131 PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend)); 1132 runstack[stacksize].start = runstart; 1133 runstack[stacksize].size = runend - runstart + 1; 1134 PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize)); 1135 ++stacksize; 1136 runstart = runend + 1; 1137 } 1138 /* Have been inside while, so discard last stacksize++ */ 1139 --stacksize; 1140 PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize)); 1141 PetscCall(PetscFree(abuff.ptr)); 1142 PetscCall(PetscFree(bbuff.ptr)); 1143 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 /*@ 1148 PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order. 1149 1150 Not Collective 1151 1152 Input Parameters: 1153 + n - number of values 1154 - arr - array of integers 1155 1156 Output Parameter: 1157 . arr - sorted array of integers 1158 1159 Notes: 1160 If the array is less than 64 entries long `PetscSortInt()` is automatically used. 1161 1162 This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is 1163 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1164 recommended that the user benchmark their code to see which routine is fastest. 1165 1166 Level: intermediate 1167 1168 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()` 1169 @*/ 1170 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[]) 1171 { 1172 PetscFunctionBegin; 1173 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1174 PetscValidIntPointer(arr, 2); 1175 if (n < 64) { 1176 PetscCall(PetscSortInt(n, arr)); 1177 } else { 1178 PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1179 } 1180 PetscFunctionReturn(PETSC_SUCCESS); 1181 } 1182 1183 /*@ 1184 PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second 1185 `PetscInt` array to match the first. 1186 1187 Not Collective 1188 1189 Input Parameter: 1190 . n - number of values 1191 1192 Input/Output Parameters: 1193 + arr1 - array of integers to be sorted, modified on output 1194 - arr2 - array of integers to be reordered, modified on output 1195 1196 Notes: 1197 The arrays CANNOT overlap. 1198 1199 This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is 1200 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1201 recommended that the user benchmark their code to see which routine is fastest. 1202 1203 Level: intermediate 1204 1205 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()` 1206 @*/ 1207 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[]) 1208 { 1209 PetscFunctionBegin; 1210 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1211 PetscValidIntPointer(arr1, 2); 1212 PetscValidIntPointer(arr2, 3); 1213 /* cannot export out to PetscIntSortWithArray here since it isn't stable */ 1214 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1215 PetscFunctionReturn(PETSC_SUCCESS); 1216 } 1217 1218 /*@ 1219 PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order. 1220 1221 Not Collective 1222 1223 Input Parameters: 1224 + n - number of values 1225 - arr - array of `PetscMPIInt` 1226 1227 Output Parameter: 1228 . arr - sorted array of integers 1229 1230 Notes: 1231 If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used. 1232 1233 This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is 1234 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1235 recommended that the user benchmark their code to see which routine is fastest. 1236 1237 Level: intermediate 1238 1239 .seealso: `PetscTimSort()`, `PetscSortMPIInt()` 1240 @*/ 1241 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[]) 1242 { 1243 PetscFunctionBegin; 1244 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1245 PetscValidIntPointer(arr, 2); 1246 if (n < 64) { 1247 PetscCall(PetscSortMPIInt(n, arr)); 1248 } else { 1249 PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1250 } 1251 PetscFunctionReturn(PETSC_SUCCESS); 1252 } 1253 1254 /*@ 1255 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt` 1256 array to match the first. 1257 1258 Not Collective 1259 1260 Input Parameter: 1261 . n - number of values 1262 1263 Input/Output Parameters: 1264 + arr1 - array of integers to be sorted, modified on output 1265 - arr2 - array of integers to be reordered, modified on output 1266 1267 Notes: 1268 The arrays CANNOT overlap. 1269 1270 This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is 1271 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1272 recommended that the user benchmark their code to see which routine is fastest. 1273 1274 Level: intermediate 1275 1276 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()` 1277 @*/ 1278 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[]) 1279 { 1280 PetscFunctionBegin; 1281 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1282 PetscValidIntPointer(arr1, 2); 1283 PetscValidIntPointer(arr2, 3); 1284 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */ 1285 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1286 PetscFunctionReturn(PETSC_SUCCESS); 1287 } 1288 1289 /*@ 1290 PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order. 1291 1292 Not Collective 1293 1294 Input Parameters: 1295 + n - number of values 1296 - arr - array of `PetscReal` 1297 1298 Output Parameter: 1299 . arr - sorted array of integers 1300 1301 Notes: 1302 If the array is less than 64 entries long `PetscSortReal()` is automatically used. 1303 1304 This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is 1305 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1306 recommended that the user benchmark their code to see which routine is fastest. 1307 1308 Level: intermediate 1309 1310 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()` 1311 @*/ 1312 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[]) 1313 { 1314 PetscFunctionBegin; 1315 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1316 PetscValidRealPointer(arr, 2); 1317 if (n < 64) { 1318 PetscCall(PetscSortReal(n, arr)); 1319 } else { 1320 PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL)); 1321 } 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } 1324 1325 /*@ 1326 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second 1327 array of `PetscInt` to match the first. 1328 1329 Not Collective 1330 1331 Input Parameter: 1332 . n - number of values 1333 1334 Input/Output Parameters: 1335 + arr1 - array of `PetscReal` to be sorted, modified on output 1336 - arr2 - array of `PetscInt` to be reordered, modified on output 1337 1338 Notes: 1339 This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is 1340 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1341 recommended that the user benchmark their code to see which routine is fastest. 1342 1343 Level: intermediate 1344 1345 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()` 1346 @*/ 1347 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[]) 1348 { 1349 PetscFunctionBegin; 1350 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1351 PetscValidRealPointer(arr1, 2); 1352 PetscValidIntPointer(arr2, 3); 1353 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */ 1354 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL)); 1355 PetscFunctionReturn(PETSC_SUCCESS); 1356 } 1357