1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 5 { 6 PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right; 7 return (l < r) ? -1 : (l > r); 8 } 9 10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 11 { 12 PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right; 13 return (l < r) ? -1 : (l > r); 14 } 15 16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 17 { 18 PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right; 19 return (l < r) ? -1 : (l > r); 20 } 21 22 #define MIN_GALLOP_CONST_GLOBAL 8 23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 24 typedef int (*CompFunc)(const void *, const void *, void *); 25 26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */ 27 #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__) 28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 29 { 30 __builtin_memcpy(t, b, size); 31 __builtin_memmove(b, a, size); 32 __builtin_memcpy(a, t, size); 33 return; 34 } 35 36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 37 { 38 __builtin_memcpy(t, ar, asize); 39 __builtin_memmove(ar, al, asize); 40 __builtin_memcpy(al, t, asize); 41 __builtin_memcpy(t, br, bsize); 42 __builtin_memmove(br, bl, bsize); 43 __builtin_memcpy(bl, t, bsize); 44 return; 45 } 46 47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 48 { 49 __builtin_memcpy(dest, src, size); 50 return; 51 } 52 53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 54 { 55 __builtin_memcpy(adest, asrc, asize); 56 __builtin_memcpy(bdest, bsrc, bsize); 57 return; 58 } 59 60 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 61 { 62 __builtin_memmove(dest, src, size); 63 return; 64 } 65 66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 67 { 68 __builtin_memmove(adest, asrc, asize); 69 __builtin_memmove(bdest, bsrc, bsize); 70 return; 71 } 72 #else 73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 74 { 75 PetscFunctionBegin; 76 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size)); 77 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size)); 78 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size)); 79 PetscFunctionReturnVoid(); 80 } 81 82 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 83 { 84 PetscFunctionBegin; 85 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize)); 86 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize)); 87 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize)); 88 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize)); 89 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize)); 90 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize)); 91 PetscFunctionReturnVoid(); 92 } 93 94 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 95 { 96 PetscFunctionBegin; 97 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size)); 98 PetscFunctionReturnVoid(); 99 } 100 101 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 102 { 103 PetscFunctionBegin; 104 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize)); 105 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize)); 106 PetscFunctionReturnVoid(); 107 } 108 109 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 110 { 111 PetscFunctionBegin; 112 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size)); 113 PetscFunctionReturnVoid(); 114 } 115 116 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 117 { 118 PetscFunctionBegin; 119 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize)); 120 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize)); 121 PetscFunctionReturnVoid(); 122 } 123 #endif 124 125 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] > 126 x. Output also inclusive. 127 128 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array: 129 130 {0,1,2,3,4,5} 131 132 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6. 133 */ 134 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 135 { 136 PetscInt last = l, k = 1, mid, cur = l + 1; 137 138 PetscFunctionBegin; 139 *m = l; 140 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l); 141 if ((*cmp)(x, arr + r * size, ctx) >= 0) { 142 *m = r; 143 PetscFunctionReturn(PETSC_SUCCESS); 144 } 145 if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 146 while (PETSC_TRUE) { 147 if (PetscUnlikely(cur > r)) { 148 cur = r; 149 break; 150 } 151 if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break; 152 last = cur; 153 cur += (k <<= 1) + 1; 154 ++k; 155 } 156 /* standard binary search but take last 0 mid 0 cur 1 into account*/ 157 while (cur > last + 1) { 158 mid = last + ((cur - last) >> 1); 159 if ((*cmp)(x, (arr) + mid * size, ctx) < 0) { 160 cur = mid; 161 } else { 162 last = mid; 163 } 164 } 165 *m = cur; 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m] 170 < x. Output also inclusive */ 171 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 172 { 173 PetscInt last = r, k = 1, mid, cur = r - 1; 174 175 PetscFunctionBegin; 176 *m = r; 177 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l); 178 if ((*cmp)(x, arr + l * size, ctx) <= 0) { 179 *m = l; 180 PetscFunctionReturn(PETSC_SUCCESS); 181 } 182 if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 183 while (PETSC_TRUE) { 184 if (PetscUnlikely(cur < l)) { 185 cur = l; 186 break; 187 } 188 if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break; 189 last = cur; 190 cur -= (k <<= 1) + 1; 191 ++k; 192 } 193 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/ 194 while (last > cur + 1) { 195 mid = last - ((last - cur) >> 1); 196 if ((*cmp)(x, (arr) + mid * size, ctx) > 0) { 197 cur = mid; 198 } else { 199 last = mid; 200 } 201 } 202 *m = cur; 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to 207 complete array, left is first index of left array, mid is first index of right array, right is last index of right 208 array */ 209 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 210 { 211 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 212 const PetscInt llen = mid - left; 213 214 PetscFunctionBegin; 215 Petsc_memcpy(tarr, arr + (left * size), llen * size); 216 while ((i < llen) && (j <= right)) { 217 if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) { 218 Petsc_memcpy(arr + (k * size), tarr + (i * size), size); 219 ++k; 220 gallopright = 0; 221 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 222 PetscInt l1, l2, diff1, diff2; 223 do { 224 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 225 /* search temp for right[j], can move up to that of temp into arr immediately */ 226 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 227 diff1 = l1 - i; 228 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 229 k += diff1; 230 i = l1; 231 /* search right for temp[i], can move up to that many of right into arr */ 232 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 233 diff2 = l2 - j; 234 Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size); 235 k += diff2; 236 j = l2; 237 if (i >= llen || j > right) break; 238 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 239 ++MIN_GALLOP_GLOBAL; 240 } 241 } else { 242 Petsc_memmove(arr + (k * size), arr + (j * size), size); 243 ++k; 244 gallopleft = 0; 245 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 246 PetscInt l1, l2, diff1, diff2; 247 do { 248 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 249 /* search right for temp[i], can move up to that many of right into arr */ 250 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 251 diff2 = l2 - j; 252 Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size); 253 k += diff2; 254 j = l2; 255 /* search temp for right[j], can copy up to that of temp into arr immediately */ 256 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 257 diff1 = l1 - i; 258 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 259 k += diff1; 260 i = l1; 261 if (i >= llen || j > right) break; 262 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 263 ++MIN_GALLOP_GLOBAL; 264 } 265 } 266 } 267 if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 272 { 273 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 274 const PetscInt llen = mid - left; 275 276 PetscFunctionBegin; 277 Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize); 278 while ((i < llen) && (j <= right)) { 279 if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) { 280 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize); 281 ++k; 282 gallopright = 0; 283 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 284 PetscInt l1, l2, diff1, diff2; 285 do { 286 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 287 /* search temp for right[j], can move up to that of temp into arr immediately */ 288 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 289 diff1 = l1 - i; 290 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 291 k += diff1; 292 i = l1; 293 /* search right for temp[i], can move up to that many of right into arr */ 294 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 295 diff2 = l2 - j; 296 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 297 k += diff2; 298 j = l2; 299 if (i >= llen || j > right) break; 300 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 301 ++MIN_GALLOP_GLOBAL; 302 } 303 } else { 304 Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize); 305 ++k; 306 gallopleft = 0; 307 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 308 PetscInt l1, l2, diff1, diff2; 309 do { 310 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 311 /* search right for temp[i], can move up to that many of right into arr */ 312 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 313 diff2 = l2 - j; 314 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 315 k += diff2; 316 j = l2; 317 /* search temp for right[j], can copy up to that of temp into arr immediately */ 318 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 319 diff1 = l1 - i; 320 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 321 k += diff1; 322 i = l1; 323 if (i >= llen || j > right) break; 324 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 325 ++MIN_GALLOP_GLOBAL; 326 } 327 } 328 } 329 if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to 334 complete array, left is first index of left array, mid is first index of right array, right is last index of right 335 array */ 336 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 337 { 338 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 339 const PetscInt rlen = right - mid + 1; 340 341 PetscFunctionBegin; 342 Petsc_memcpy(tarr, (arr) + mid * size, rlen * size); 343 while ((i >= 0) && (j >= left)) { 344 if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) { 345 Petsc_memcpy((arr) + k * size, (tarr) + i * size, size); 346 --k; 347 gallopleft = 0; 348 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 349 PetscInt l1, l2, diff1, diff2; 350 do { 351 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 352 /* search temp for left[j], can copy up to that many of temp into arr */ 353 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 354 diff1 = i - l1; 355 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 356 k -= diff1; 357 i = l1; 358 /* search left for temp[i], can move up to that many of left up arr */ 359 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 360 diff2 = j - l2; 361 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 362 k -= diff2; 363 j = l2; 364 if (i < 0 || j < left) break; 365 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 366 ++MIN_GALLOP_GLOBAL; 367 } 368 } else { 369 Petsc_memmove((arr) + k * size, (arr) + j * size, size); 370 --k; 371 gallopright = 0; 372 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 373 PetscInt l1, l2, diff1, diff2; 374 do { 375 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 376 /* search left for temp[i], can move up to that many of left up arr */ 377 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 378 diff2 = j - l2; 379 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 380 k -= diff2; 381 j = l2; 382 /* search temp for left[j], can copy up to that many of temp into arr */ 383 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 384 diff1 = i - l1; 385 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 386 k -= diff1; 387 i = l1; 388 if (i < 0 || j < left) break; 389 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 390 ++MIN_GALLOP_GLOBAL; 391 } 392 } 393 } 394 if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 399 { 400 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 401 const PetscInt rlen = right - mid + 1; 402 403 PetscFunctionBegin; 404 Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize); 405 while ((i >= 0) && (j >= left)) { 406 if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) { 407 Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize); 408 --k; 409 gallopleft = 0; 410 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 411 PetscInt l1, l2, diff1, diff2; 412 do { 413 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 414 /* search temp for left[j], can copy up to that many of temp into arr */ 415 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 416 diff1 = i - l1; 417 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 418 k -= diff1; 419 i = l1; 420 /* search left for temp[i], can move up to that many of left up arr */ 421 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 422 diff2 = j - l2; 423 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 424 k -= diff2; 425 j = l2; 426 if (i < 0 || j < left) break; 427 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 428 ++MIN_GALLOP_GLOBAL; 429 } 430 } else { 431 Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize); 432 --k; 433 gallopright = 0; 434 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 435 PetscInt l1, l2, diff1, diff2; 436 do { 437 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 438 /* search left for temp[i], can move up to that many of left up arr */ 439 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 440 diff2 = j - l2; 441 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 442 k -= diff2; 443 j = l2; 444 /* search temp for left[j], can copy up to that many of temp into arr */ 445 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 446 diff1 = i - l1; 447 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 448 k -= diff1; 449 i = l1; 450 if (i < 0 || j < left) break; 451 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 452 ++MIN_GALLOP_GLOBAL; 453 } 454 } 455 } 456 if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper 461 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */ 462 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 463 { 464 PetscInt i = start == left ? start + 1 : start; 465 466 PetscFunctionBegin; 467 for (; i <= right; ++i) { 468 PetscInt j = i - 1; 469 Petsc_memcpy(tarr, arr + (i * size), size); 470 while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) { 471 COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size); 472 --j; 473 } 474 Petsc_memcpy((arr) + (j + 1) * size, tarr, size); 475 } 476 PetscFunctionReturn(PETSC_SUCCESS); 477 } 478 479 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 480 { 481 PetscInt i = start == left ? start + 1 : start; 482 483 PetscFunctionBegin; 484 for (; i <= right; ++i) { 485 PetscInt j = i - 1; 486 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 487 while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) { 488 COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize); 489 --j; 490 } 491 Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize); 492 } 493 PetscFunctionReturn(PETSC_SUCCESS); 494 } 495 496 /* See PetscInsertionSort_Private */ 497 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 498 { 499 PetscInt i = start == left ? start + 1 : start; 500 501 PetscFunctionBegin; 502 for (; i <= right; ++i) { 503 PetscInt l = left, r = i; 504 Petsc_memcpy(tarr, arr + (i * size), size); 505 do { 506 const PetscInt m = l + ((r - l) >> 1); 507 if ((*cmp)(tarr, arr + (m * size), ctx) < 0) { 508 r = m; 509 } else { 510 l = m + 1; 511 } 512 } while (l < r); 513 Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size); 514 Petsc_memcpy(arr + (l * size), tarr, size); 515 } 516 PetscFunctionReturn(PETSC_SUCCESS); 517 } 518 519 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 520 { 521 PetscInt i = start == left ? start + 1 : start; 522 523 PetscFunctionBegin; 524 for (; i <= right; ++i) { 525 PetscInt l = left, r = i; 526 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 527 do { 528 const PetscInt m = l + ((r - l) >> 1); 529 if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) { 530 r = m; 531 } else { 532 l = m + 1; 533 } 534 } while (l < r); 535 Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize); 536 Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize); 537 } 538 PetscFunctionReturn(PETSC_SUCCESS); 539 } 540 541 typedef struct { 542 PetscInt size; 543 PetscInt start; 544 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */ 545 } PetscTimSortStack; 546 #else 547 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt)); 548 #endif 549 550 typedef struct { 551 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN); 552 size_t size; 553 size_t maxsize; 554 } PetscTimSortBuffer; 555 556 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize) 557 { 558 PetscFunctionBegin; 559 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(PETSC_SUCCESS); 560 { 561 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */ 562 size_t newMax = PetscMin(newSize * newSize, buff->maxsize); 563 PetscCall(PetscFree(buff->ptr)); 564 PetscCall(PetscMalloc1(newMax, &buff->ptr)); 565 buff->size = newMax; 566 } 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize) 571 { 572 PetscFunctionBegin; 573 for (; stacksize; --stacksize) { 574 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 575 if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) { 576 PetscInt l, m = stack[stacksize].start, r; 577 /* Search A for B[0] insertion */ 578 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l)); 579 /* Search B for A[-1] insertion */ 580 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r)); 581 if (m - l <= r - m) { 582 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 583 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 584 } else { 585 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 586 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 587 } 588 } 589 /* Update A with merge */ 590 stack[stacksize - 1].size += stack[stacksize].size; 591 } 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize) 596 { 597 PetscFunctionBegin; 598 for (; stacksize; --stacksize) { 599 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 600 if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) { 601 PetscInt l, m = stack[stacksize].start, r; 602 /* Search A for B[0] insertion */ 603 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l)); 604 /* Search B for A[-1] insertion */ 605 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r)); 606 if (m - l <= r - m) { 607 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 608 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 609 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 610 } else { 611 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 612 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 613 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 614 } 615 } 616 /* Update A with merge */ 617 stack[stacksize - 1].size += stack[stacksize].size; 618 } 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 622 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize) 623 { 624 PetscInt i = *stacksize; 625 626 PetscFunctionBegin; 627 while (i) { 628 PetscInt l, m, r, itemp = i; 629 630 if (i == 1) { 631 /* A = stack[i-1], B = stack[i] */ 632 if (stack[i - 1].size < stack[i].size) { 633 /* if A[-1] <= B[0] then sorted */ 634 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 635 m = stack[i].start; 636 /* Search A for B[0] insertion */ 637 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l)); 638 /* Search B for A[-1] insertion */ 639 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r)); 640 if (m - l <= r - m) { 641 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 642 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 643 } else { 644 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 645 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 646 } 647 } 648 /* Update A with merge */ 649 stack[i - 1].size += stack[i].size; 650 --i; 651 } 652 } else { 653 /* i > 2, i.e. C exists 654 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 655 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 656 if (stack[i - 2].size < stack[i].size) { 657 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 658 if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) { 659 m = stack[i - 1].start; 660 /* Search A for B[0] insertion */ 661 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l)); 662 /* Search B for A[-1] insertion */ 663 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r)); 664 if (m - l <= r - m) { 665 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 666 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 667 } else { 668 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 669 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 670 } 671 } 672 /* Update A with merge */ 673 stack[i - 2].size += stack[i - 1].size; 674 /* Push C up the stack */ 675 stack[i - 1].start = stack[i].start; 676 stack[i - 1].size = stack[i].size; 677 } else { 678 /* merge C into B */ 679 mergeBC: 680 /* If B[-1] <= C[0] then... you know the drill */ 681 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 682 m = stack[i].start; 683 /* Search B for C[0] insertion */ 684 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l)); 685 /* Search C for B[-1] insertion */ 686 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r)); 687 if (m - l <= r - m) { 688 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 689 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 690 } else { 691 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 692 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 693 } 694 } 695 /* Update B with merge */ 696 stack[i - 1].size += stack[i].size; 697 } 698 --i; 699 } else if (stack[i - 1].size <= stack[i].size) { 700 /* merge C into B */ 701 goto mergeBC; 702 } 703 } 704 if (itemp == i) break; 705 } 706 *stacksize = i; 707 PetscFunctionReturn(PETSC_SUCCESS); 708 } 709 710 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize) 711 { 712 PetscInt i = *stacksize; 713 714 PetscFunctionBegin; 715 while (i) { 716 PetscInt l, m, r, itemp = i; 717 718 if (i == 1) { 719 /* A = stack[i-1], B = stack[i] */ 720 if (stack[i - 1].size < stack[i].size) { 721 /* if A[-1] <= B[0] then sorted */ 722 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 723 m = stack[i].start; 724 /* Search A for B[0] insertion */ 725 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l)); 726 /* Search B for A[-1] insertion */ 727 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r)); 728 if (m - l <= r - m) { 729 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 730 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 731 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 732 } else { 733 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 734 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 735 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 736 } 737 } 738 /* Update A with merge */ 739 stack[i - 1].size += stack[i].size; 740 --i; 741 } 742 } else { 743 /* i > 2, i.e. C exists 744 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 745 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 746 if (stack[i - 2].size < stack[i].size) { 747 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 748 if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) { 749 m = stack[i - 1].start; 750 /* Search A for B[0] insertion */ 751 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l)); 752 /* Search B for A[-1] insertion */ 753 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r)); 754 if (m - l <= r - m) { 755 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 756 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 757 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 758 } else { 759 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 760 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 761 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 762 } 763 } 764 /* Update A with merge */ 765 stack[i - 2].size += stack[i - 1].size; 766 /* Push C up the stack */ 767 stack[i - 1].start = stack[i].start; 768 stack[i - 1].size = stack[i].size; 769 } else { 770 /* merge C into B */ 771 mergeBC: 772 /* If B[-1] <= C[0] then... you know the drill */ 773 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 774 m = stack[i].start; 775 /* Search B for C[0] insertion */ 776 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l)); 777 /* Search C for B[-1] insertion */ 778 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r)); 779 if (m - l <= r - m) { 780 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 781 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 782 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 783 } else { 784 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 785 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 786 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 787 } 788 } 789 /* Update B with merge */ 790 stack[i - 1].size += stack[i].size; 791 } 792 --i; 793 } else if (stack[i - 1].size <= stack[i].size) { 794 /* merge C into B */ 795 goto mergeBC; 796 } 797 } 798 if (itemp == i) break; 799 } 800 *stacksize = i; 801 PetscFunctionReturn(PETSC_SUCCESS); 802 } 803 804 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous 805 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either 806 binary insertion sort or regulat insertion sort */ 807 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 808 { 809 const PetscInt re = PetscMin(runstart + minrun, n - 1); 810 PetscInt ri = runstart; 811 812 PetscFunctionBegin; 813 if (PetscUnlikely(runstart == n - 1)) { 814 *runend = runstart; 815 PetscFunctionReturn(PETSC_SUCCESS); 816 } 817 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 818 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) { 819 ++ri; 820 while (ri < n - 1) { 821 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break; 822 ++ri; 823 } 824 { 825 PetscInt lo = runstart, hi = ri; 826 for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size); 827 } 828 } else { 829 ++ri; 830 while (ri < n - 1) { 831 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break; 832 ++ri; 833 } 834 } 835 if (ri < re) { 836 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 837 binary search */ 838 if (ri - runstart <= minrun >> 1) { 839 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 840 PetscCall(PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 841 } else { 842 PetscCall(PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 843 } 844 *runend = re; 845 } else *runend = ri; 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 850 { 851 const PetscInt re = PetscMin(runstart + minrun, n - 1); 852 PetscInt ri = runstart; 853 854 PetscFunctionBegin; 855 if (PetscUnlikely(runstart == n - 1)) { 856 *runend = runstart; 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 860 if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) { 861 ++ri; 862 while (ri < n - 1) { 863 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break; 864 ++ri; 865 } 866 { 867 PetscInt lo = runstart, hi = ri; 868 for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr); 869 } 870 } else { 871 ++ri; 872 while (ri < n - 1) { 873 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break; 874 ++ri; 875 } 876 } 877 if (ri < re) { 878 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 879 binary search */ 880 if (ri - runstart <= minrun >> 1) { 881 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 882 PetscCall(PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 883 } else { 884 PetscCall(PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 885 } 886 *runend = re; 887 } else *runend = ri; 888 PetscFunctionReturn(PETSC_SUCCESS); 889 } 890 891 /*@C 892 PetscTimSort - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm. 893 894 Not Collective 895 896 Input Parameters: 897 + n - number of values 898 . arr - array to be sorted 899 . size - size in bytes of the datatype held in arr 900 . cmp - function pointer to comparison function 901 - ctx - optional context to be passed to comparison function, NULL if not needed 902 903 Output Parameter: 904 . arr - sorted array 905 906 Level: developer 907 908 Notes: 909 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 910 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to 911 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To 912 do so it repeatedly triggers attempts throughout to merge adjacent runs. 913 914 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 915 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 916 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 917 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 918 likely all contain similar data. 919 920 Example Usage: 921 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 922 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 923 may also 924 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if 925 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing 926 order 927 928 .vb 929 int increasing_comparison_function(const void *left, const void *right, void *ctx) { 930 my_type l = *(my_type *) left, r = *(my_type *) right; 931 return (l < r) ? -1 : (l > r); 932 } 933 .ve 934 935 Note the context is unused here but you may use it to pass and subsequently access whatever information required 936 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 937 Then pass the function 938 .vb 939 PetscTimSort(n, arr, sizeof(arr[0]), increasing_comparison_function, ctx) 940 .ve 941 942 Fortran Notes: 943 To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 944 returns result. For example 945 .vb 946 subroutine CompareIntegers(left,right,ctx,result) 947 implicit none 948 949 PetscInt,intent(in) :: left, right 950 type(UserCtx) :: ctx 951 integer,intent(out) :: result 952 953 if (left < right) then 954 result = -1 955 else if (left == right) then 956 result = 0 957 else 958 result = 1 959 end if 960 return 961 end subroutine CompareIntegers 962 .ve 963 964 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()` 965 @*/ 966 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx) 967 { 968 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 969 PetscTimSortStack runstack[128]; 970 PetscTimSortBuffer buff; 971 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 972 It is so unlikely that this limit is reached that this is __never__ checked for */ 973 974 PetscFunctionBegin; 975 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 976 is a power of 2 or one plus a power of 2 */ 977 { 978 PetscInt t = n, r = 0; 979 /* r becomes 1 if the least significant bits contain at least one off bit */ 980 while (t >= 64) { 981 r |= t & 1; 982 t >>= 1; 983 } 984 minrun = t + r; 985 } 986 if (PetscDefined(USE_DEBUG)) { 987 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 988 if (n < 64) { 989 PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n)); 990 } else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 991 } 992 PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr)); 993 buff.size = (size_t)minrun * size; 994 buff.maxsize = (size_t)n * size; 995 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 996 while (runstart < n) { 997 /* Check if additional entries are at least partially ordered and build natural run */ 998 PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend)); 999 runstack[stacksize].start = runstart; 1000 runstack[stacksize].size = runend - runstart + 1; 1001 PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize)); 1002 ++stacksize; 1003 runstart = runend + 1; 1004 } 1005 /* Have been inside while, so discard last stacksize++ */ 1006 --stacksize; 1007 PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize)); 1008 PetscCall(PetscFree(buff.ptr)); 1009 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1010 PetscFunctionReturn(PETSC_SUCCESS); 1011 } 1012 1013 /*@C 1014 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm and 1015 reorders a second array to match the first. The arrays need not be the same type. 1016 1017 Not Collective 1018 1019 Input Parameters: 1020 + n - number of values 1021 . asize - size in bytes of the datatype held in arr 1022 . bsize - size in bytes of the datatype held in barr 1023 . cmp - function pointer to comparison function 1024 - ctx - optional context to be passed to comparison function, NULL if not needed 1025 1026 Input/Output Parameters: 1027 + arr - array to be sorted, on output it is sorted 1028 - barr - array to be reordered, on output it is reordered 1029 1030 Level: developer 1031 1032 Notes: 1033 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT 1034 overlap. 1035 1036 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 1037 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims 1038 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced 1039 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs. 1040 1041 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 1042 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 1043 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 1044 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 1045 likely all contain similar data. 1046 1047 Example Usage: 1048 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 1049 between its arguments. If left < right \: return -1, if left == right \: return 0, if left > right \: return 1. The user 1050 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only 1051 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in 1052 increasing order 1053 1054 .vb 1055 int increasing_comparison_function(const void *left, const void *right, void *ctx) { 1056 my_type l = *(my_type *) left, r = *(my_type *) right; 1057 return (l < r) ? -1 : (l > r); 1058 } 1059 .ve 1060 1061 Note the context is unused here but you may use it to pass and subsequently access whatever information required 1062 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 1063 Then pass the function 1064 .vb 1065 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), increasing_comparison_function, ctx) 1066 .ve 1067 1068 Fortran Notes: 1069 To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 1070 returns result. For example 1071 .vb 1072 subroutine CompareIntegers(left,right,ctx,result) 1073 implicit none 1074 1075 PetscInt,intent(in) :: left, right 1076 type(UserCtx) :: ctx 1077 integer,intent(out) :: result 1078 1079 if (left < right) then 1080 result = -1 1081 else if (left == right) then 1082 result = 0 1083 else 1084 result = 1 1085 end if 1086 return 1087 end subroutine CompareIntegers 1088 .ve 1089 1090 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()` 1091 @*/ 1092 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx) 1093 { 1094 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 1095 PetscTimSortStack runstack[128]; 1096 PetscTimSortBuffer abuff, bbuff; 1097 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 1098 It is so unlikely that this limit is reached that this is __never__ checked for */ 1099 1100 PetscFunctionBegin; 1101 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 1102 is a power of 2 or one plus a power of 2 */ 1103 { 1104 PetscInt t = n, r = 0; 1105 /* r becomes 1 if the least significant bits contain at least one off bit */ 1106 while (t >= 64) { 1107 r |= t & 1; 1108 t >>= 1; 1109 } 1110 minrun = t + r; 1111 } 1112 if (PetscDefined(USE_DEBUG)) { 1113 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 1114 PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 1115 } 1116 PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr)); 1117 abuff.size = (size_t)minrun * asize; 1118 abuff.maxsize = (size_t)n * asize; 1119 PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr)); 1120 bbuff.size = (size_t)minrun * bsize; 1121 bbuff.maxsize = (size_t)n * bsize; 1122 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1123 while (runstart < n) { 1124 /* Check if additional entries are at least partially ordered and build natural run */ 1125 PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend)); 1126 runstack[stacksize].start = runstart; 1127 runstack[stacksize].size = runend - runstart + 1; 1128 PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize)); 1129 ++stacksize; 1130 runstart = runend + 1; 1131 } 1132 /* Have been inside while, so discard last stacksize++ */ 1133 --stacksize; 1134 PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize)); 1135 PetscCall(PetscFree(abuff.ptr)); 1136 PetscCall(PetscFree(bbuff.ptr)); 1137 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1138 PetscFunctionReturn(PETSC_SUCCESS); 1139 } 1140 1141 /*@ 1142 PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order. 1143 1144 Not Collective 1145 1146 Input Parameters: 1147 + n - number of values 1148 - arr - array of integers 1149 1150 Output Parameter: 1151 . arr - sorted array of integers 1152 1153 Level: intermediate 1154 1155 Notes: 1156 If the array is less than 64 entries long `PetscSortInt()` is automatically used. 1157 1158 This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is 1159 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1160 recommended that the user benchmark their code to see which routine is fastest. 1161 1162 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()` 1163 @*/ 1164 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[]) 1165 { 1166 PetscFunctionBegin; 1167 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1168 PetscAssertPointer(arr, 2); 1169 if (n < 64) { 1170 PetscCall(PetscSortInt(n, arr)); 1171 } else { 1172 PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1173 } 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 /*@ 1178 PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second 1179 `PetscInt` array to match the first. 1180 1181 Not Collective 1182 1183 Input Parameter: 1184 . n - number of values 1185 1186 Input/Output Parameters: 1187 + arr1 - array of integers to be sorted, modified on output 1188 - arr2 - array of integers to be reordered, modified on output 1189 1190 Level: intermediate 1191 1192 Notes: 1193 The arrays CANNOT overlap. 1194 1195 This function serves as an alternative to `PetscSortIntWithArray()`. While this function works for any array of integers it is 1196 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1197 recommended that the user benchmark their code to see which routine is fastest. 1198 1199 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()` 1200 @*/ 1201 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[]) 1202 { 1203 PetscFunctionBegin; 1204 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1205 PetscAssertPointer(arr1, 2); 1206 PetscAssertPointer(arr2, 3); 1207 /* cannot export out to PetscIntSortWithArray here since it isn't stable */ 1208 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1209 PetscFunctionReturn(PETSC_SUCCESS); 1210 } 1211 1212 /*@ 1213 PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order. 1214 1215 Not Collective 1216 1217 Input Parameters: 1218 + n - number of values 1219 - arr - array of `PetscMPIInt` 1220 1221 Output Parameter: 1222 . arr - sorted array of integers 1223 1224 Level: intermediate 1225 1226 Notes: 1227 If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used. 1228 1229 This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is 1230 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1231 recommended that the user benchmark their code to see which routine is fastest. 1232 1233 .seealso: `PetscTimSort()`, `PetscSortMPIInt()` 1234 @*/ 1235 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[]) 1236 { 1237 PetscFunctionBegin; 1238 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1239 PetscAssertPointer(arr, 2); 1240 if (n < 64) { 1241 PetscCall(PetscSortMPIInt(n, arr)); 1242 } else { 1243 PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1244 } 1245 PetscFunctionReturn(PETSC_SUCCESS); 1246 } 1247 1248 /*@ 1249 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt` 1250 array to match the first. 1251 1252 Not Collective 1253 1254 Input Parameter: 1255 . n - number of values 1256 1257 Input/Output Parameters: 1258 + arr1 - array of integers to be sorted, modified on output 1259 - arr2 - array of integers to be reordered, modified on output 1260 1261 Level: intermediate 1262 1263 Notes: 1264 The arrays CANNOT overlap. 1265 1266 This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is 1267 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1268 recommended that the user benchmark their code to see which routine is fastest. 1269 1270 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()` 1271 @*/ 1272 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[]) 1273 { 1274 PetscFunctionBegin; 1275 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1276 PetscAssertPointer(arr1, 2); 1277 PetscAssertPointer(arr2, 3); 1278 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */ 1279 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1280 PetscFunctionReturn(PETSC_SUCCESS); 1281 } 1282 1283 /*@ 1284 PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order. 1285 1286 Not Collective 1287 1288 Input Parameters: 1289 + n - number of values 1290 - arr - array of `PetscReal` 1291 1292 Output Parameter: 1293 . arr - sorted array of integers 1294 1295 Level: intermediate 1296 1297 Notes: 1298 If the array is less than 64 entries long `PetscSortReal()` is automatically used. 1299 1300 This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is 1301 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1302 recommended that the user benchmark their code to see which routine is fastest. 1303 1304 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()` 1305 @*/ 1306 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[]) 1307 { 1308 PetscFunctionBegin; 1309 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1310 PetscAssertPointer(arr, 2); 1311 if (n < 64) { 1312 PetscCall(PetscSortReal(n, arr)); 1313 } else { 1314 PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL)); 1315 } 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } 1318 1319 /*@ 1320 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second 1321 array of `PetscInt` to match the first. 1322 1323 Not Collective 1324 1325 Input Parameter: 1326 . n - number of values 1327 1328 Input/Output Parameters: 1329 + arr1 - array of `PetscReal` to be sorted, modified on output 1330 - arr2 - array of `PetscInt` to be reordered, modified on output 1331 1332 Level: intermediate 1333 1334 Notes: 1335 This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is 1336 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1337 recommended that the user benchmark their code to see which routine is fastest. 1338 1339 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()` 1340 @*/ 1341 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[]) 1342 { 1343 PetscFunctionBegin; 1344 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1345 PetscAssertPointer(arr1, 2); 1346 PetscAssertPointer(arr2, 3); 1347 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */ 1348 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL)); 1349 PetscFunctionReturn(PETSC_SUCCESS); 1350 } 1351