1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 5 { 6 PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right; 7 return (l < r) ? -1 : (l > r); 8 } 9 10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 11 { 12 PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right; 13 return (l < r) ? -1 : (l > r); 14 } 15 16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 17 { 18 PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right; 19 return (l < r) ? -1 : (l > r); 20 } 21 22 #define MIN_GALLOP_CONST_GLOBAL 8 23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 24 typedef int (*CompFunc)(const void *, const void *, void *); 25 26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */ 27 #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__) 28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 29 { 30 __builtin_memcpy(t, b, size); 31 __builtin_memmove(b, a, size); 32 __builtin_memcpy(a, t, size); 33 return; 34 } 35 36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 37 { 38 __builtin_memcpy(t, ar, asize); 39 __builtin_memmove(ar, al, asize); 40 __builtin_memcpy(al, t, asize); 41 __builtin_memcpy(t, br, bsize); 42 __builtin_memmove(br, bl, bsize); 43 __builtin_memcpy(bl, t, bsize); 44 return; 45 } 46 47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 48 { 49 __builtin_memcpy(dest, src, size); 50 return; 51 } 52 53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 54 { 55 __builtin_memcpy(adest, asrc, asize); 56 __builtin_memcpy(bdest, bsrc, bsize); 57 return; 58 } 59 60 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 61 { 62 __builtin_memmove(dest, src, size); 63 return; 64 } 65 66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 67 { 68 __builtin_memmove(adest, asrc, asize); 69 __builtin_memmove(bdest, bsrc, bsize); 70 return; 71 } 72 #else 73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 74 { 75 PetscFunctionBegin; 76 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size)); 77 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size)); 78 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size)); 79 PetscFunctionReturnVoid(); 80 } 81 82 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 83 { 84 PetscFunctionBegin; 85 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize)); 86 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize)); 87 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize)); 88 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize)); 89 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize)); 90 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize)); 91 PetscFunctionReturnVoid(); 92 } 93 94 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 95 { 96 PetscFunctionBegin; 97 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size)); 98 PetscFunctionReturnVoid(); 99 } 100 101 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 102 { 103 PetscFunctionBegin; 104 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize)); 105 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize)); 106 PetscFunctionReturnVoid(); 107 } 108 109 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 110 { 111 PetscFunctionBegin; 112 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size)); 113 PetscFunctionReturnVoid(); 114 } 115 116 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 117 { 118 PetscFunctionBegin; 119 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize)); 120 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize)); 121 PetscFunctionReturnVoid(); 122 } 123 #endif 124 125 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] > 126 x. Output also inclusive. 127 128 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array: 129 130 {0,1,2,3,4,5} 131 132 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6. 133 */ 134 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 135 { 136 PetscInt last = l, k = 1, mid, cur = l + 1; 137 138 PetscFunctionBegin; 139 *m = l; 140 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l); 141 if ((*cmp)(x, arr + r * size, ctx) >= 0) { 142 *m = r; 143 PetscFunctionReturn(PETSC_SUCCESS); 144 } 145 if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 146 while (PETSC_TRUE) { 147 if (PetscUnlikely(cur > r)) { 148 cur = r; 149 break; 150 } 151 if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break; 152 last = cur; 153 cur += (k <<= 1) + 1; 154 ++k; 155 } 156 /* standard binary search but take last 0 mid 0 cur 1 into account*/ 157 while (cur > last + 1) { 158 mid = last + ((cur - last) >> 1); 159 if ((*cmp)(x, (arr) + mid * size, ctx) < 0) { 160 cur = mid; 161 } else { 162 last = mid; 163 } 164 } 165 *m = cur; 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m] 170 < x. Output also inclusive */ 171 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 172 { 173 PetscInt last = r, k = 1, mid, cur = r - 1; 174 175 PetscFunctionBegin; 176 *m = r; 177 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l); 178 if ((*cmp)(x, arr + l * size, ctx) <= 0) { 179 *m = l; 180 PetscFunctionReturn(PETSC_SUCCESS); 181 } 182 if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS); 183 while (PETSC_TRUE) { 184 if (PetscUnlikely(cur < l)) { 185 cur = l; 186 break; 187 } 188 if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break; 189 last = cur; 190 cur -= (k <<= 1) + 1; 191 ++k; 192 } 193 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/ 194 while (last > cur + 1) { 195 mid = last - ((last - cur) >> 1); 196 if ((*cmp)(x, (arr) + mid * size, ctx) > 0) { 197 cur = mid; 198 } else { 199 last = mid; 200 } 201 } 202 *m = cur; 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to 207 complete array, left is first index of left array, mid is first index of right array, right is last index of right 208 array */ 209 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 210 { 211 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 212 const PetscInt llen = mid - left; 213 214 PetscFunctionBegin; 215 Petsc_memcpy(tarr, arr + (left * size), llen * size); 216 while ((i < llen) && (j <= right)) { 217 if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) { 218 Petsc_memcpy(arr + (k * size), tarr + (i * size), size); 219 ++k; 220 gallopright = 0; 221 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 222 PetscInt l1, l2, diff1, diff2; 223 do { 224 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 225 /* search temp for right[j], can move up to that of temp into arr immediately */ 226 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 227 diff1 = l1 - i; 228 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 229 k += diff1; 230 i = l1; 231 /* search right for temp[i], can move up to that many of right into arr */ 232 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 233 diff2 = l2 - j; 234 Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size); 235 k += diff2; 236 j = l2; 237 if (i >= llen || j > right) break; 238 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 239 ++MIN_GALLOP_GLOBAL; 240 } 241 } else { 242 Petsc_memmove(arr + (k * size), arr + (j * size), size); 243 ++k; 244 gallopleft = 0; 245 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 246 PetscInt l1, l2, diff1, diff2; 247 do { 248 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 249 /* search right for temp[i], can move up to that many of right into arr */ 250 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 251 diff2 = l2 - j; 252 Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size); 253 k += diff2; 254 j = l2; 255 /* search temp for right[j], can copy up to that of temp into arr immediately */ 256 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 257 diff1 = l1 - i; 258 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 259 k += diff1; 260 i = l1; 261 if (i >= llen || j > right) break; 262 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 263 ++MIN_GALLOP_GLOBAL; 264 } 265 } 266 } 267 if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 272 { 273 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 274 const PetscInt llen = mid - left; 275 276 PetscFunctionBegin; 277 Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize); 278 while ((i < llen) && (j <= right)) { 279 if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) { 280 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize); 281 ++k; 282 gallopright = 0; 283 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 284 PetscInt l1, l2, diff1, diff2; 285 do { 286 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 287 /* search temp for right[j], can move up to that of temp into arr immediately */ 288 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 289 diff1 = l1 - i; 290 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 291 k += diff1; 292 i = l1; 293 /* search right for temp[i], can move up to that many of right into arr */ 294 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 295 diff2 = l2 - j; 296 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 297 k += diff2; 298 j = l2; 299 if (i >= llen || j > right) break; 300 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 301 ++MIN_GALLOP_GLOBAL; 302 } 303 } else { 304 Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize); 305 ++k; 306 gallopleft = 0; 307 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 308 PetscInt l1, l2, diff1, diff2; 309 do { 310 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 311 /* search right for temp[i], can move up to that many of right into arr */ 312 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 313 diff2 = l2 - j; 314 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 315 k += diff2; 316 j = l2; 317 /* search temp for right[j], can copy up to that of temp into arr immediately */ 318 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 319 diff1 = l1 - i; 320 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 321 k += diff1; 322 i = l1; 323 if (i >= llen || j > right) break; 324 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 325 ++MIN_GALLOP_GLOBAL; 326 } 327 } 328 } 329 if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to 334 complete array, left is first index of left array, mid is first index of right array, right is last index of right 335 array */ 336 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 337 { 338 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 339 const PetscInt rlen = right - mid + 1; 340 341 PetscFunctionBegin; 342 Petsc_memcpy(tarr, (arr) + mid * size, rlen * size); 343 while ((i >= 0) && (j >= left)) { 344 if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) { 345 Petsc_memcpy((arr) + k * size, (tarr) + i * size, size); 346 --k; 347 gallopleft = 0; 348 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 349 PetscInt l1, l2, diff1, diff2; 350 do { 351 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 352 /* search temp for left[j], can copy up to that many of temp into arr */ 353 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 354 diff1 = i - l1; 355 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 356 k -= diff1; 357 i = l1; 358 /* search left for temp[i], can move up to that many of left up arr */ 359 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 360 diff2 = j - l2; 361 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 362 k -= diff2; 363 j = l2; 364 if (i < 0 || j < left) break; 365 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 366 ++MIN_GALLOP_GLOBAL; 367 } 368 } else { 369 Petsc_memmove((arr) + k * size, (arr) + j * size, size); 370 --k; 371 gallopright = 0; 372 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 373 PetscInt l1, l2, diff1, diff2; 374 do { 375 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 376 /* search left for temp[i], can move up to that many of left up arr */ 377 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 378 diff2 = j - l2; 379 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 380 k -= diff2; 381 j = l2; 382 /* search temp for left[j], can copy up to that many of temp into arr */ 383 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 384 diff1 = i - l1; 385 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 386 k -= diff1; 387 i = l1; 388 if (i < 0 || j < left) break; 389 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 390 ++MIN_GALLOP_GLOBAL; 391 } 392 } 393 } 394 if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 399 { 400 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 401 const PetscInt rlen = right - mid + 1; 402 403 PetscFunctionBegin; 404 Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize); 405 while ((i >= 0) && (j >= left)) { 406 if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) { 407 Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize); 408 --k; 409 gallopleft = 0; 410 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 411 PetscInt l1, l2, diff1, diff2; 412 do { 413 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 414 /* search temp for left[j], can copy up to that many of temp into arr */ 415 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 416 diff1 = i - l1; 417 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 418 k -= diff1; 419 i = l1; 420 /* search left for temp[i], can move up to that many of left up arr */ 421 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 422 diff2 = j - l2; 423 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 424 k -= diff2; 425 j = l2; 426 if (i < 0 || j < left) break; 427 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 428 ++MIN_GALLOP_GLOBAL; 429 } 430 } else { 431 Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize); 432 --k; 433 gallopright = 0; 434 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 435 PetscInt l1, l2, diff1, diff2; 436 do { 437 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 438 /* search left for temp[i], can move up to that many of left up arr */ 439 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 440 diff2 = j - l2; 441 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 442 k -= diff2; 443 j = l2; 444 /* search temp for left[j], can copy up to that many of temp into arr */ 445 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 446 diff1 = i - l1; 447 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 448 k -= diff1; 449 i = l1; 450 if (i < 0 || j < left) break; 451 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 452 ++MIN_GALLOP_GLOBAL; 453 } 454 } 455 } 456 if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper 461 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */ 462 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 463 { 464 PetscInt i = start == left ? start + 1 : start; 465 466 PetscFunctionBegin; 467 for (; i <= right; ++i) { 468 PetscInt j = i - 1; 469 Petsc_memcpy(tarr, arr + (i * size), size); 470 while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) { 471 COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size); 472 --j; 473 } 474 Petsc_memcpy((arr) + (j + 1) * size, tarr, size); 475 } 476 PetscFunctionReturn(PETSC_SUCCESS); 477 } 478 479 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 480 { 481 PetscInt i = start == left ? start + 1 : start; 482 483 PetscFunctionBegin; 484 for (; i <= right; ++i) { 485 PetscInt j = i - 1; 486 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 487 while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) { 488 COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize); 489 --j; 490 } 491 Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize); 492 } 493 PetscFunctionReturn(PETSC_SUCCESS); 494 } 495 496 /* See PetscInsertionSort_Private */ 497 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 498 { 499 PetscInt i = start == left ? start + 1 : start; 500 501 PetscFunctionBegin; 502 for (; i <= right; ++i) { 503 PetscInt l = left, r = i; 504 Petsc_memcpy(tarr, arr + (i * size), size); 505 do { 506 const PetscInt m = l + ((r - l) >> 1); 507 if ((*cmp)(tarr, arr + (m * size), ctx) < 0) { 508 r = m; 509 } else { 510 l = m + 1; 511 } 512 } while (l < r); 513 Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size); 514 Petsc_memcpy(arr + (l * size), tarr, size); 515 } 516 PetscFunctionReturn(PETSC_SUCCESS); 517 } 518 519 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 520 { 521 PetscInt i = start == left ? start + 1 : start; 522 523 PetscFunctionBegin; 524 for (; i <= right; ++i) { 525 PetscInt l = left, r = i; 526 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 527 do { 528 const PetscInt m = l + ((r - l) >> 1); 529 if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) { 530 r = m; 531 } else { 532 l = m + 1; 533 } 534 } while (l < r); 535 Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize); 536 Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize); 537 } 538 PetscFunctionReturn(PETSC_SUCCESS); 539 } 540 541 typedef struct { 542 PetscInt size; 543 PetscInt start; 544 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */ 545 } PetscTimSortStack; 546 #else 547 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt)); 548 #endif 549 550 typedef struct { 551 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN); 552 size_t size; 553 size_t maxsize; 554 } PetscTimSortBuffer; 555 556 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize) 557 { 558 PetscFunctionBegin; 559 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(PETSC_SUCCESS); 560 { 561 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */ 562 size_t newMax = PetscMin(newSize * newSize, buff->maxsize); 563 PetscCall(PetscFree(buff->ptr)); 564 PetscCall(PetscMalloc1(newMax, &buff->ptr)); 565 buff->size = newMax; 566 } 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize) 571 { 572 PetscFunctionBegin; 573 for (; stacksize; --stacksize) { 574 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 575 if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) { 576 PetscInt l, m = stack[stacksize].start, r; 577 /* Search A for B[0] insertion */ 578 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l)); 579 /* Search B for A[-1] insertion */ 580 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r)); 581 if (m - l <= r - m) { 582 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 583 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 584 } else { 585 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 586 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 587 } 588 } 589 /* Update A with merge */ 590 stack[stacksize - 1].size += stack[stacksize].size; 591 } 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize) 596 { 597 PetscFunctionBegin; 598 for (; stacksize; --stacksize) { 599 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 600 if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) { 601 PetscInt l, m = stack[stacksize].start, r; 602 /* Search A for B[0] insertion */ 603 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l)); 604 /* Search B for A[-1] insertion */ 605 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r)); 606 if (m - l <= r - m) { 607 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 608 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 609 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 610 } else { 611 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 612 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 613 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 614 } 615 } 616 /* Update A with merge */ 617 stack[stacksize - 1].size += stack[stacksize].size; 618 } 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 622 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize) 623 { 624 PetscInt i = *stacksize; 625 626 PetscFunctionBegin; 627 while (i) { 628 PetscInt l, m, r, itemp = i; 629 630 if (i == 1) { 631 /* A = stack[i-1], B = stack[i] */ 632 if (stack[i - 1].size < stack[i].size) { 633 /* if A[-1] <= B[0] then sorted */ 634 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 635 m = stack[i].start; 636 /* Search A for B[0] insertion */ 637 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l)); 638 /* Search B for A[-1] insertion */ 639 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r)); 640 if (m - l <= r - m) { 641 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 642 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 643 } else { 644 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 645 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 646 } 647 } 648 /* Update A with merge */ 649 stack[i - 1].size += stack[i].size; 650 --i; 651 } 652 } else { 653 /* i > 2, i.e. C exists 654 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 655 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 656 if (stack[i - 2].size < stack[i].size) { 657 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 658 if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) { 659 m = stack[i - 1].start; 660 /* Search A for B[0] insertion */ 661 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l)); 662 /* Search B for A[-1] insertion */ 663 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r)); 664 if (m - l <= r - m) { 665 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 666 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 667 } else { 668 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 669 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 670 } 671 } 672 /* Update A with merge */ 673 stack[i - 2].size += stack[i - 1].size; 674 /* Push C up the stack */ 675 stack[i - 1].start = stack[i].start; 676 stack[i - 1].size = stack[i].size; 677 } else { 678 /* merge C into B */ 679 mergeBC: 680 /* If B[-1] <= C[0] then... you know the drill */ 681 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 682 m = stack[i].start; 683 /* Search B for C[0] insertion */ 684 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l)); 685 /* Search C for B[-1] insertion */ 686 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r)); 687 if (m - l <= r - m) { 688 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 689 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 690 } else { 691 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 692 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 693 } 694 } 695 /* Update B with merge */ 696 stack[i - 1].size += stack[i].size; 697 } 698 --i; 699 } else if (stack[i - 1].size <= stack[i].size) { 700 /* merge C into B */ 701 goto mergeBC; 702 } 703 } 704 if (itemp == i) break; 705 } 706 *stacksize = i; 707 PetscFunctionReturn(PETSC_SUCCESS); 708 } 709 710 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize) 711 { 712 PetscInt i = *stacksize; 713 714 PetscFunctionBegin; 715 while (i) { 716 PetscInt l, m, r, itemp = i; 717 718 if (i == 1) { 719 /* A = stack[i-1], B = stack[i] */ 720 if (stack[i - 1].size < stack[i].size) { 721 /* if A[-1] <= B[0] then sorted */ 722 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 723 m = stack[i].start; 724 /* Search A for B[0] insertion */ 725 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l)); 726 /* Search B for A[-1] insertion */ 727 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r)); 728 if (m - l <= r - m) { 729 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 730 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 731 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 732 } else { 733 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 734 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 735 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 736 } 737 } 738 /* Update A with merge */ 739 stack[i - 1].size += stack[i].size; 740 --i; 741 } 742 } else { 743 /* i > 2, i.e. C exists 744 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 745 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 746 if (stack[i - 2].size < stack[i].size) { 747 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 748 if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) { 749 m = stack[i - 1].start; 750 /* Search A for B[0] insertion */ 751 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l)); 752 /* Search B for A[-1] insertion */ 753 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r)); 754 if (m - l <= r - m) { 755 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 756 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 757 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 758 } else { 759 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 760 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 761 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 762 } 763 } 764 /* Update A with merge */ 765 stack[i - 2].size += stack[i - 1].size; 766 /* Push C up the stack */ 767 stack[i - 1].start = stack[i].start; 768 stack[i - 1].size = stack[i].size; 769 } else { 770 /* merge C into B */ 771 mergeBC: 772 /* If B[-1] <= C[0] then... you know the drill */ 773 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 774 m = stack[i].start; 775 /* Search B for C[0] insertion */ 776 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l)); 777 /* Search C for B[-1] insertion */ 778 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r)); 779 if (m - l <= r - m) { 780 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 781 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 782 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 783 } else { 784 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 785 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 786 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 787 } 788 } 789 /* Update B with merge */ 790 stack[i - 1].size += stack[i].size; 791 } 792 --i; 793 } else if (stack[i - 1].size <= stack[i].size) { 794 /* merge C into B */ 795 goto mergeBC; 796 } 797 } 798 if (itemp == i) break; 799 } 800 *stacksize = i; 801 PetscFunctionReturn(PETSC_SUCCESS); 802 } 803 804 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous 805 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either 806 binary insertion sort or regulat insertion sort */ 807 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 808 { 809 const PetscInt re = PetscMin(runstart + minrun, n - 1); 810 PetscInt ri = runstart; 811 812 PetscFunctionBegin; 813 if (PetscUnlikely(runstart == n - 1)) { 814 *runend = runstart; 815 PetscFunctionReturn(PETSC_SUCCESS); 816 } 817 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 818 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) { 819 ++ri; 820 while (ri < n - 1) { 821 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break; 822 ++ri; 823 } 824 { 825 PetscInt lo = runstart, hi = ri; 826 for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size); 827 } 828 } else { 829 ++ri; 830 while (ri < n - 1) { 831 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break; 832 ++ri; 833 } 834 } 835 if (ri < re) { 836 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 837 binary search */ 838 if (ri - runstart <= minrun >> 1) { 839 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 840 PetscCall(PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 841 } else { 842 PetscCall(PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re)); 843 } 844 *runend = re; 845 } else *runend = ri; 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 850 { 851 const PetscInt re = PetscMin(runstart + minrun, n - 1); 852 PetscInt ri = runstart; 853 854 PetscFunctionBegin; 855 if (PetscUnlikely(runstart == n - 1)) { 856 *runend = runstart; 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 860 if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) { 861 ++ri; 862 while (ri < n - 1) { 863 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break; 864 ++ri; 865 } 866 { 867 PetscInt lo = runstart, hi = ri; 868 for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr); 869 } 870 } else { 871 ++ri; 872 while (ri < n - 1) { 873 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break; 874 ++ri; 875 } 876 } 877 if (ri < re) { 878 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 879 binary search */ 880 if (ri - runstart <= minrun >> 1) { 881 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 882 PetscCall(PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 883 } else { 884 PetscCall(PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re)); 885 } 886 *runend = re; 887 } else *runend = ri; 888 PetscFunctionReturn(PETSC_SUCCESS); 889 } 890 891 /*@C 892 PetscTimSort - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm. 893 894 Not Collective, No Fortran Support 895 896 Input Parameters: 897 + n - number of values 898 . arr - array to be sorted 899 . size - size in bytes of the datatype held in arr 900 . cmp - function pointer to comparison function 901 - ctx - optional context to be passed to comparison function, NULL if not needed 902 903 Output Parameter: 904 . arr - sorted array 905 906 Level: developer 907 908 Notes: 909 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 910 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to 911 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To 912 do so it repeatedly triggers attempts throughout to merge adjacent runs. 913 914 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 915 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 916 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 917 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 918 likely all contain similar data. 919 920 Example Usage: 921 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 922 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 923 may also 924 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if 925 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing 926 order 927 928 .vb 929 int increasing_comparison_function(const void *left, const void *right, void *ctx) { 930 my_type l = *(my_type *) left, r = *(my_type *) right; 931 return (l < r) ? -1 : (l > r); 932 } 933 .ve 934 935 Note the context is unused here but you may use it to pass and subsequently access whatever information required 936 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 937 Then pass the function 938 .vb 939 PetscTimSort(n, arr, sizeof(arr[0]), increasing_comparison_function, ctx) 940 .ve 941 942 Fortran Notes: 943 To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts `left`, `right`, `ctx` and 944 returns `result`. For example 945 .vb 946 subroutine CompareIntegers(left,right,ctx,result) 947 implicit none 948 949 PetscInt,intent(in) :: left, right 950 type(UserCtx) :: ctx 951 integer,intent(out) :: result 952 953 if (left < right) then 954 result = -1 955 else if (left == right) then 956 result = 0 957 else 958 result = 1 959 end if 960 return 961 end subroutine CompareIntegers 962 .ve 963 964 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()` 965 @*/ 966 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx) 967 { 968 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 969 PetscTimSortStack runstack[128]; 970 PetscTimSortBuffer buff; 971 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 972 It is so unlikely that this limit is reached that this is __never__ checked for */ 973 974 PetscFunctionBegin; 975 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 976 is a power of 2 or one plus a power of 2 */ 977 { 978 PetscInt t = n, r = 0; 979 /* r becomes 1 if the least significant bits contain at least one off bit */ 980 while (t >= 64) { 981 r |= t & 1; 982 t >>= 1; 983 } 984 minrun = t + r; 985 } 986 if (PetscDefined(USE_DEBUG)) { 987 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 988 if (n < 64) PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n)); 989 else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 990 } 991 PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr)); 992 buff.size = (size_t)minrun * size; 993 buff.maxsize = (size_t)n * size; 994 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 995 while (runstart < n) { 996 /* Check if additional entries are at least partially ordered and build natural run */ 997 PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend)); 998 runstack[stacksize].start = runstart; 999 runstack[stacksize].size = runend - runstart + 1; 1000 PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize)); 1001 ++stacksize; 1002 runstart = runend + 1; 1003 } 1004 /* Have been inside while, so discard last stacksize++ */ 1005 --stacksize; 1006 PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize)); 1007 PetscCall(PetscFree(buff.ptr)); 1008 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1009 PetscFunctionReturn(PETSC_SUCCESS); 1010 } 1011 1012 /*@C 1013 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm and 1014 reorders a second array to match the first. The arrays need not be the same type. 1015 1016 Not Collective, No Fortran Support 1017 1018 Input Parameters: 1019 + n - number of values 1020 . asize - size in bytes of the datatype held in arr 1021 . bsize - size in bytes of the datatype held in barr 1022 . cmp - function pointer to comparison function 1023 - ctx - optional context to be passed to comparison function, NULL if not needed 1024 1025 Input/Output Parameters: 1026 + arr - array to be sorted, on output it is sorted 1027 - barr - array to be reordered, on output it is reordered 1028 1029 Level: developer 1030 1031 Notes: 1032 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT 1033 overlap. 1034 1035 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 1036 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims 1037 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced 1038 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs. 1039 1040 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 1041 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 1042 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 1043 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 1044 likely all contain similar data. 1045 1046 Example Usage: 1047 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference 1048 between its arguments. If left < right \: return -1, if left == right \: return 0, if left > right \: return 1. The user 1049 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only 1050 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in 1051 increasing order 1052 1053 .vb 1054 int increasing_comparison_function(const void *left, const void *right, void *ctx) { 1055 my_type l = *(my_type *) left, r = *(my_type *) right; 1056 return (l < r) ? -1 : (l > r); 1057 } 1058 .ve 1059 1060 Note the context is unused here but you may use it to pass and subsequently access whatever information required 1061 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 1062 Then pass the function 1063 .vb 1064 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), increasing_comparison_function, ctx) 1065 .ve 1066 1067 Fortran Notes: 1068 To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts `left`, `right`, `ctx` and 1069 returns `result`. For example 1070 .vb 1071 subroutine CompareIntegers(left,right,ctx,result) 1072 implicit none 1073 1074 PetscInt,intent(in) :: left, right 1075 type(UserCtx) :: ctx 1076 integer,intent(out) :: result 1077 1078 if (left < right) then 1079 result = -1 1080 else if (left == right) then 1081 result = 0 1082 else 1083 result = 1 1084 end if 1085 return 1086 end subroutine CompareIntegers 1087 .ve 1088 1089 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()` 1090 @*/ 1091 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx) 1092 { 1093 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 1094 PetscTimSortStack runstack[128]; 1095 PetscTimSortBuffer abuff, bbuff; 1096 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 1097 It is so unlikely that this limit is reached that this is __never__ checked for */ 1098 1099 PetscFunctionBegin; 1100 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 1101 is a power of 2 or one plus a power of 2 */ 1102 { 1103 PetscInt t = n, r = 0; 1104 /* r becomes 1 if the least significant bits contain at least one off bit */ 1105 while (t >= 64) { 1106 r |= t & 1; 1107 t >>= 1; 1108 } 1109 minrun = t + r; 1110 } 1111 if (PetscDefined(USE_DEBUG)) { 1112 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 1113 PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 1114 } 1115 PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr)); 1116 abuff.size = (size_t)minrun * asize; 1117 abuff.maxsize = (size_t)n * asize; 1118 PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr)); 1119 bbuff.size = (size_t)minrun * bsize; 1120 bbuff.maxsize = (size_t)n * bsize; 1121 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1122 while (runstart < n) { 1123 /* Check if additional entries are at least partially ordered and build natural run */ 1124 PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend)); 1125 runstack[stacksize].start = runstart; 1126 runstack[stacksize].size = runend - runstart + 1; 1127 PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize)); 1128 ++stacksize; 1129 runstart = runend + 1; 1130 } 1131 /* Have been inside while, so discard last stacksize++ */ 1132 --stacksize; 1133 PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize)); 1134 PetscCall(PetscFree(abuff.ptr)); 1135 PetscCall(PetscFree(bbuff.ptr)); 1136 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /*@ 1141 PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order. 1142 1143 Not Collective 1144 1145 Input Parameters: 1146 + n - number of values 1147 - arr - array of integers 1148 1149 Output Parameter: 1150 . arr - sorted array of integers 1151 1152 Level: intermediate 1153 1154 Notes: 1155 If the array is less than 64 entries long `PetscSortInt()` is automatically used. 1156 1157 This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is 1158 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1159 recommended that the user benchmark their code to see which routine is fastest. 1160 1161 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()` 1162 @*/ 1163 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[]) 1164 { 1165 PetscFunctionBegin; 1166 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1167 PetscAssertPointer(arr, 2); 1168 if (n < 64) { 1169 PetscCall(PetscSortInt(n, arr)); 1170 } else { 1171 PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1172 } 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 /*@ 1177 PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second 1178 `PetscInt` array to match the first. 1179 1180 Not Collective 1181 1182 Input Parameter: 1183 . n - number of values 1184 1185 Input/Output Parameters: 1186 + arr1 - array of integers to be sorted, modified on output 1187 - arr2 - array of integers to be reordered, modified on output 1188 1189 Level: intermediate 1190 1191 Notes: 1192 The arrays CANNOT overlap. 1193 1194 This function serves as an alternative to `PetscSortIntWithArray()`. While this function works for any array of integers it is 1195 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1196 recommended that the user benchmark their code to see which routine is fastest. 1197 1198 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()` 1199 @*/ 1200 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[]) 1201 { 1202 PetscFunctionBegin; 1203 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1204 PetscAssertPointer(arr1, 2); 1205 PetscAssertPointer(arr2, 3); 1206 /* cannot export out to PetscIntSortWithArray here since it isn't stable */ 1207 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1208 PetscFunctionReturn(PETSC_SUCCESS); 1209 } 1210 1211 /*@ 1212 PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order. 1213 1214 Not Collective 1215 1216 Input Parameters: 1217 + n - number of values 1218 - arr - array of `PetscMPIInt` 1219 1220 Output Parameter: 1221 . arr - sorted array of integers 1222 1223 Level: intermediate 1224 1225 Notes: 1226 If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used. 1227 1228 This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is 1229 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1230 recommended that the user benchmark their code to see which routine is fastest. 1231 1232 .seealso: `PetscTimSort()`, `PetscSortMPIInt()` 1233 @*/ 1234 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[]) 1235 { 1236 PetscFunctionBegin; 1237 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1238 PetscAssertPointer(arr, 2); 1239 if (n < 64) { 1240 PetscCall(PetscSortMPIInt(n, arr)); 1241 } else { 1242 PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1243 } 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 /*@ 1248 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt` 1249 array to match the first. 1250 1251 Not Collective 1252 1253 Input Parameter: 1254 . n - number of values 1255 1256 Input/Output Parameters: 1257 + arr1 - array of integers to be sorted, modified on output 1258 - arr2 - array of integers to be reordered, modified on output 1259 1260 Level: intermediate 1261 1262 Notes: 1263 The arrays CANNOT overlap. 1264 1265 This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is 1266 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1267 recommended that the user benchmark their code to see which routine is fastest. 1268 1269 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()` 1270 @*/ 1271 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[]) 1272 { 1273 PetscFunctionBegin; 1274 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1275 PetscAssertPointer(arr1, 2); 1276 PetscAssertPointer(arr2, 3); 1277 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */ 1278 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 /*@ 1283 PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order. 1284 1285 Not Collective 1286 1287 Input Parameters: 1288 + n - number of values 1289 - arr - array of `PetscReal` 1290 1291 Output Parameter: 1292 . arr - sorted array of integers 1293 1294 Level: intermediate 1295 1296 Notes: 1297 If the array is less than 64 entries long `PetscSortReal()` is automatically used. 1298 1299 This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is 1300 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1301 recommended that the user benchmark their code to see which routine is fastest. 1302 1303 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()` 1304 @*/ 1305 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[]) 1306 { 1307 PetscFunctionBegin; 1308 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1309 PetscAssertPointer(arr, 2); 1310 if (n < 64) { 1311 PetscCall(PetscSortReal(n, arr)); 1312 } else { 1313 PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL)); 1314 } 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 1318 /*@ 1319 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second 1320 array of `PetscInt` to match the first. 1321 1322 Not Collective 1323 1324 Input Parameter: 1325 . n - number of values 1326 1327 Input/Output Parameters: 1328 + arr1 - array of `PetscReal` to be sorted, modified on output 1329 - arr2 - array of `PetscInt` to be reordered, modified on output 1330 1331 Level: intermediate 1332 1333 Notes: 1334 This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is 1335 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1336 recommended that the user benchmark their code to see which routine is fastest. 1337 1338 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()` 1339 @*/ 1340 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[]) 1341 { 1342 PetscFunctionBegin; 1343 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS); 1344 PetscAssertPointer(arr1, 2); 1345 PetscAssertPointer(arr2, 3); 1346 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */ 1347 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL)); 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350