1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) { 5 PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right; 6 return (l < r) ? -1 : (l > r); 7 } 8 9 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) { 10 PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right; 11 return (l < r) ? -1 : (l > r); 12 } 13 14 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) { 15 PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right; 16 return (l < r) ? -1 : (l > r); 17 } 18 19 #define MIN_GALLOP_CONST_GLOBAL 8 20 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 21 typedef int (*CompFunc)(const void *, const void *, void *); 22 23 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */ 24 #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__) 25 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) { 26 __builtin_memcpy(t, b, size); 27 __builtin_memmove(b, a, size); 28 __builtin_memcpy(a, t, size); 29 return; 30 } 31 32 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) { 33 __builtin_memcpy(t, ar, asize); 34 __builtin_memmove(ar, al, asize); 35 __builtin_memcpy(al, t, asize); 36 __builtin_memcpy(t, br, bsize); 37 __builtin_memmove(br, bl, bsize); 38 __builtin_memcpy(bl, t, bsize); 39 return; 40 } 41 42 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) { 43 __builtin_memcpy(dest, src, size); 44 return; 45 } 46 47 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) { 48 __builtin_memcpy(adest, asrc, asize); 49 __builtin_memcpy(bdest, bsrc, bsize); 50 return; 51 } 52 53 static inline void Petsc_memmove(char *dest, const char *src, size_t size) { 54 __builtin_memmove(dest, src, size); 55 return; 56 } 57 58 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) { 59 __builtin_memmove(adest, asrc, asize); 60 __builtin_memmove(bdest, bsrc, bsize); 61 return; 62 } 63 #else 64 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) { 65 PetscFunctionBegin; 66 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size)); 67 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size)); 68 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size)); 69 PetscFunctionReturnVoid(); 70 } 71 72 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) { 73 PetscFunctionBegin; 74 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize)); 75 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize)); 76 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize)); 77 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize)); 78 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize)); 79 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize)); 80 PetscFunctionReturnVoid(); 81 } 82 83 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) { 84 PetscFunctionBegin; 85 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size)); 86 PetscFunctionReturnVoid(); 87 } 88 89 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) { 90 PetscFunctionBegin; 91 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize)); 92 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize)); 93 PetscFunctionReturnVoid(); 94 } 95 96 static inline void Petsc_memmove(char *dest, const char *src, size_t size) { 97 PetscFunctionBegin; 98 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size)); 99 PetscFunctionReturnVoid(); 100 } 101 102 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) { 103 PetscFunctionBegin; 104 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize)); 105 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize)); 106 PetscFunctionReturnVoid(); 107 } 108 #endif 109 110 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] > 111 x. Output also inclusive. 112 113 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array: 114 115 {0,1,2,3,4,5} 116 117 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6. 118 */ 119 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) { 120 PetscInt last = l, k = 1, mid, cur = l + 1; 121 122 PetscFunctionBegin; 123 *m = l; 124 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l); 125 if ((*cmp)(x, arr + r * size, ctx) >= 0) { 126 *m = r; 127 PetscFunctionReturn(0); 128 } 129 if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(0); 130 while (PETSC_TRUE) { 131 if (PetscUnlikely(cur > r)) { 132 cur = r; 133 break; 134 } 135 if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break; 136 last = cur; 137 cur += (k <<= 1) + 1; 138 ++k; 139 } 140 /* standard binary search but take last 0 mid 0 cur 1 into account*/ 141 while (cur > last + 1) { 142 mid = last + ((cur - last) >> 1); 143 if ((*cmp)(x, (arr) + mid * size, ctx) < 0) { 144 cur = mid; 145 } else { 146 last = mid; 147 } 148 } 149 *m = cur; 150 PetscFunctionReturn(0); 151 } 152 153 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m] 154 < x. Output also inclusive */ 155 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) { 156 PetscInt last = r, k = 1, mid, cur = r - 1; 157 158 PetscFunctionBegin; 159 *m = r; 160 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l); 161 if ((*cmp)(x, arr + l * size, ctx) <= 0) { 162 *m = l; 163 PetscFunctionReturn(0); 164 } 165 if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(0); 166 while (PETSC_TRUE) { 167 if (PetscUnlikely(cur < l)) { 168 cur = l; 169 break; 170 } 171 if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break; 172 last = cur; 173 cur -= (k <<= 1) + 1; 174 ++k; 175 } 176 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/ 177 while (last > cur + 1) { 178 mid = last - ((last - cur) >> 1); 179 if ((*cmp)(x, (arr) + mid * size, ctx) > 0) { 180 cur = mid; 181 } else { 182 last = mid; 183 } 184 } 185 *m = cur; 186 PetscFunctionReturn(0); 187 } 188 189 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to 190 complete array, left is first index of left array, mid is first index of right array, right is last index of right 191 array */ 192 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) { 193 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 194 const PetscInt llen = mid - left; 195 196 PetscFunctionBegin; 197 Petsc_memcpy(tarr, arr + (left * size), llen * size); 198 while ((i < llen) && (j <= right)) { 199 if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) { 200 Petsc_memcpy(arr + (k * size), tarr + (i * size), size); 201 ++k; 202 gallopright = 0; 203 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 204 PetscInt l1, l2, diff1, diff2; 205 do { 206 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 207 /* search temp for right[j], can move up to that of temp into arr immediately */ 208 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 209 diff1 = l1 - i; 210 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 211 k += diff1; 212 i = l1; 213 /* search right for temp[i], can move up to that many of right into arr */ 214 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 215 diff2 = l2 - j; 216 Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size); 217 k += diff2; 218 j = l2; 219 if (i >= llen || j > right) break; 220 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 221 ++MIN_GALLOP_GLOBAL; 222 } 223 } else { 224 Petsc_memmove(arr + (k * size), arr + (j * size), size); 225 ++k; 226 gallopleft = 0; 227 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 228 PetscInt l1, l2, diff1, diff2; 229 do { 230 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 231 /* search right for temp[i], can move up to that many of right into arr */ 232 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2)); 233 diff2 = l2 - j; 234 Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size); 235 k += diff2; 236 j = l2; 237 /* search temp for right[j], can copy up to that of temp into arr immediately */ 238 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1)); 239 diff1 = l1 - i; 240 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size); 241 k += diff1; 242 i = l1; 243 if (i >= llen || j > right) break; 244 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 245 ++MIN_GALLOP_GLOBAL; 246 } 247 } 248 } 249 if (i < llen) { Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size); } 250 PetscFunctionReturn(0); 251 } 252 253 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) { 254 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 255 const PetscInt llen = mid - left; 256 257 PetscFunctionBegin; 258 Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize); 259 while ((i < llen) && (j <= right)) { 260 if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) { 261 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize); 262 ++k; 263 gallopright = 0; 264 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 265 PetscInt l1, l2, diff1, diff2; 266 do { 267 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 268 /* search temp for right[j], can move up to that of temp into arr immediately */ 269 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 270 diff1 = l1 - i; 271 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 272 k += diff1; 273 i = l1; 274 /* search right for temp[i], can move up to that many of right into arr */ 275 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 276 diff2 = l2 - j; 277 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 278 k += diff2; 279 j = l2; 280 if (i >= llen || j > right) break; 281 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 282 ++MIN_GALLOP_GLOBAL; 283 } 284 } else { 285 Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize); 286 ++k; 287 gallopleft = 0; 288 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 289 PetscInt l1, l2, diff1, diff2; 290 do { 291 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 292 /* search right for temp[i], can move up to that many of right into arr */ 293 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2)); 294 diff2 = l2 - j; 295 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize); 296 k += diff2; 297 j = l2; 298 /* search temp for right[j], can copy up to that of temp into arr immediately */ 299 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1)); 300 diff1 = l1 - i; 301 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize); 302 k += diff1; 303 i = l1; 304 if (i >= llen || j > right) break; 305 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 306 ++MIN_GALLOP_GLOBAL; 307 } 308 } 309 } 310 if (i < llen) { Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize); } 311 PetscFunctionReturn(0); 312 } 313 314 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to 315 complete array, left is first index of left array, mid is first index of right array, right is last index of right 316 array */ 317 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) { 318 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 319 const PetscInt rlen = right - mid + 1; 320 321 PetscFunctionBegin; 322 Petsc_memcpy(tarr, (arr) + mid * size, rlen * size); 323 while ((i >= 0) && (j >= left)) { 324 if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) { 325 Petsc_memcpy((arr) + k * size, (tarr) + i * size, size); 326 --k; 327 gallopleft = 0; 328 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 329 PetscInt l1, l2, diff1, diff2; 330 do { 331 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 332 /* search temp for left[j], can copy up to that many of temp into arr */ 333 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 334 diff1 = i - l1; 335 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 336 k -= diff1; 337 i = l1; 338 /* search left for temp[i], can move up to that many of left up arr */ 339 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 340 diff2 = j - l2; 341 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 342 k -= diff2; 343 j = l2; 344 if (i < 0 || j < left) break; 345 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 346 ++MIN_GALLOP_GLOBAL; 347 } 348 } else { 349 Petsc_memmove((arr) + k * size, (arr) + j * size, size); 350 --k; 351 gallopright = 0; 352 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 353 PetscInt l1, l2, diff1, diff2; 354 do { 355 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 356 /* search left for temp[i], can move up to that many of left up arr */ 357 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2)); 358 diff2 = j - l2; 359 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size); 360 k -= diff2; 361 j = l2; 362 /* search temp for left[j], can copy up to that many of temp into arr */ 363 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1)); 364 diff1 = i - l1; 365 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size); 366 k -= diff1; 367 i = l1; 368 if (i < 0 || j < left) break; 369 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 370 ++MIN_GALLOP_GLOBAL; 371 } 372 } 373 } 374 if (i >= 0) { Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size); } 375 PetscFunctionReturn(0); 376 } 377 378 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) { 379 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0; 380 const PetscInt rlen = right - mid + 1; 381 382 PetscFunctionBegin; 383 Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize); 384 while ((i >= 0) && (j >= left)) { 385 if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) { 386 Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize); 387 --k; 388 gallopleft = 0; 389 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 390 PetscInt l1, l2, diff1, diff2; 391 do { 392 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 393 /* search temp for left[j], can copy up to that many of temp into arr */ 394 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 395 diff1 = i - l1; 396 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 397 k -= diff1; 398 i = l1; 399 /* search left for temp[i], can move up to that many of left up arr */ 400 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 401 diff2 = j - l2; 402 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 403 k -= diff2; 404 j = l2; 405 if (i < 0 || j < left) break; 406 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 407 ++MIN_GALLOP_GLOBAL; 408 } 409 } else { 410 Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize); 411 --k; 412 gallopright = 0; 413 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 414 PetscInt l1, l2, diff1, diff2; 415 do { 416 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 417 /* search left for temp[i], can move up to that many of left up arr */ 418 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2)); 419 diff2 = j - l2; 420 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize); 421 k -= diff2; 422 j = l2; 423 /* search temp for left[j], can copy up to that many of temp into arr */ 424 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1)); 425 diff1 = i - l1; 426 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize); 427 k -= diff1; 428 i = l1; 429 if (i < 0 || j < left) break; 430 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 431 ++MIN_GALLOP_GLOBAL; 432 } 433 } 434 } 435 if (i >= 0) { Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize); } 436 PetscFunctionReturn(0); 437 } 438 439 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper 440 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */ 441 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) { 442 PetscInt i = start == left ? start + 1 : start; 443 444 PetscFunctionBegin; 445 for (; i <= right; ++i) { 446 PetscInt j = i - 1; 447 Petsc_memcpy(tarr, arr + (i * size), size); 448 while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) { 449 COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size); 450 --j; 451 } 452 Petsc_memcpy((arr) + (j + 1) * size, tarr, size); 453 } 454 PetscFunctionReturn(0); 455 } 456 457 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) { 458 PetscInt i = start == left ? start + 1 : start; 459 460 PetscFunctionBegin; 461 for (; i <= right; ++i) { 462 PetscInt j = i - 1; 463 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 464 while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) { 465 COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize); 466 --j; 467 } 468 Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize); 469 } 470 PetscFunctionReturn(0); 471 } 472 473 /* See PetscInsertionSort_Private */ 474 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) { 475 PetscInt i = start == left ? start + 1 : start; 476 477 PetscFunctionBegin; 478 for (; i <= right; ++i) { 479 PetscInt l = left, r = i; 480 Petsc_memcpy(tarr, arr + (i * size), size); 481 do { 482 const PetscInt m = l + ((r - l) >> 1); 483 if ((*cmp)(tarr, arr + (m * size), ctx) < 0) { 484 r = m; 485 } else { 486 l = m + 1; 487 } 488 } while (l < r); 489 Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size); 490 Petsc_memcpy(arr + (l * size), tarr, size); 491 } 492 PetscFunctionReturn(0); 493 } 494 495 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) { 496 PetscInt i = start == left ? start + 1 : start; 497 498 PetscFunctionBegin; 499 for (; i <= right; ++i) { 500 PetscInt l = left, r = i; 501 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize); 502 do { 503 const PetscInt m = l + ((r - l) >> 1); 504 if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) { 505 r = m; 506 } else { 507 l = m + 1; 508 } 509 } while (l < r); 510 Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize); 511 Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize); 512 } 513 PetscFunctionReturn(0); 514 } 515 516 typedef struct { 517 PetscInt size; 518 PetscInt start; 519 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */ 520 } PetscTimSortStack; 521 #else 522 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt)); 523 #endif 524 525 typedef struct { 526 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN); 527 size_t size; 528 size_t maxsize; 529 } PetscTimSortBuffer; 530 531 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize) { 532 PetscFunctionBegin; 533 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0); 534 { 535 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */ 536 size_t newMax = PetscMin(newSize * newSize, buff->maxsize); 537 PetscCall(PetscFree(buff->ptr)); 538 PetscCall(PetscMalloc1(newMax, &buff->ptr)); 539 buff->size = newMax; 540 } 541 PetscFunctionReturn(0); 542 } 543 544 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize) { 545 PetscFunctionBegin; 546 for (; stacksize; --stacksize) { 547 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 548 if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) { 549 PetscInt l, m = stack[stacksize].start, r; 550 /* Search A for B[0] insertion */ 551 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l)); 552 /* Search B for A[-1] insertion */ 553 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r)); 554 if (m - l <= r - m) { 555 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 556 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 557 } else { 558 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 559 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 560 } 561 } 562 /* Update A with merge */ 563 stack[stacksize - 1].size += stack[stacksize].size; 564 } 565 PetscFunctionReturn(0); 566 } 567 568 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize) { 569 PetscFunctionBegin; 570 for (; stacksize; --stacksize) { 571 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 572 if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) { 573 PetscInt l, m = stack[stacksize].start, r; 574 /* Search A for B[0] insertion */ 575 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l)); 576 /* Search B for A[-1] insertion */ 577 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r)); 578 if (m - l <= r - m) { 579 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 580 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 581 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 582 } else { 583 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 584 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 585 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 586 } 587 } 588 /* Update A with merge */ 589 stack[stacksize - 1].size += stack[stacksize].size; 590 } 591 PetscFunctionReturn(0); 592 } 593 594 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize) { 595 PetscInt i = *stacksize; 596 597 PetscFunctionBegin; 598 while (i) { 599 PetscInt l, m, r, itemp = i; 600 601 if (i == 1) { 602 /* A = stack[i-1], B = stack[i] */ 603 if (stack[i - 1].size < stack[i].size) { 604 /* if A[-1] <= B[0] then sorted */ 605 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 606 m = stack[i].start; 607 /* Search A for B[0] insertion */ 608 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l)); 609 /* Search B for A[-1] insertion */ 610 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r)); 611 if (m - l <= r - m) { 612 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 613 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 614 } else { 615 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 616 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 617 } 618 } 619 /* Update A with merge */ 620 stack[i - 1].size += stack[i].size; 621 --i; 622 } 623 } else { 624 /* i > 2, i.e. C exists 625 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 626 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 627 if (stack[i - 2].size < stack[i].size) { 628 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 629 if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) { 630 m = stack[i - 1].start; 631 /* Search A for B[0] insertion */ 632 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l)); 633 /* Search B for A[-1] insertion */ 634 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r)); 635 if (m - l <= r - m) { 636 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 637 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 638 } else { 639 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 640 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 641 } 642 } 643 /* Update A with merge */ 644 stack[i - 2].size += stack[i - 1].size; 645 /* Push C up the stack */ 646 stack[i - 1].start = stack[i].start; 647 stack[i - 1].size = stack[i].size; 648 } else { 649 /* merge C into B */ 650 mergeBC: 651 /* If B[-1] <= C[0] then... you know the drill */ 652 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) { 653 m = stack[i].start; 654 /* Search B for C[0] insertion */ 655 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l)); 656 /* Search C for B[-1] insertion */ 657 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r)); 658 if (m - l <= r - m) { 659 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size)); 660 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 661 } else { 662 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size)); 663 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 664 } 665 } 666 /* Update B with merge */ 667 stack[i - 1].size += stack[i].size; 668 } 669 --i; 670 } else if (stack[i - 1].size <= stack[i].size) { 671 /* merge C into B */ 672 goto mergeBC; 673 } 674 } 675 if (itemp == i) break; 676 } 677 *stacksize = i; 678 PetscFunctionReturn(0); 679 } 680 681 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize) { 682 PetscInt i = *stacksize; 683 684 PetscFunctionBegin; 685 while (i) { 686 PetscInt l, m, r, itemp = i; 687 688 if (i == 1) { 689 /* A = stack[i-1], B = stack[i] */ 690 if (stack[i - 1].size < stack[i].size) { 691 /* if A[-1] <= B[0] then sorted */ 692 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 693 m = stack[i].start; 694 /* Search A for B[0] insertion */ 695 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l)); 696 /* Search B for A[-1] insertion */ 697 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r)); 698 if (m - l <= r - m) { 699 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 700 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 701 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 702 } else { 703 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 704 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 705 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 706 } 707 } 708 /* Update A with merge */ 709 stack[i - 1].size += stack[i].size; 710 --i; 711 } 712 } else { 713 /* i > 2, i.e. C exists 714 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 715 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) { 716 if (stack[i - 2].size < stack[i].size) { 717 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 718 if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) { 719 m = stack[i - 1].start; 720 /* Search A for B[0] insertion */ 721 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l)); 722 /* Search B for A[-1] insertion */ 723 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r)); 724 if (m - l <= r - m) { 725 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 726 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 727 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 728 } else { 729 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 730 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 731 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 732 } 733 } 734 /* Update A with merge */ 735 stack[i - 2].size += stack[i - 1].size; 736 /* Push C up the stack */ 737 stack[i - 1].start = stack[i].start; 738 stack[i - 1].size = stack[i].size; 739 } else { 740 /* merge C into B */ 741 mergeBC: 742 /* If B[-1] <= C[0] then... you know the drill */ 743 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) { 744 m = stack[i].start; 745 /* Search B for C[0] insertion */ 746 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l)); 747 /* Search C for B[-1] insertion */ 748 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r)); 749 if (m - l <= r - m) { 750 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize)); 751 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize)); 752 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 753 } else { 754 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize)); 755 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize)); 756 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 757 } 758 } 759 /* Update B with merge */ 760 stack[i - 1].size += stack[i].size; 761 } 762 --i; 763 } else if (stack[i - 1].size <= stack[i].size) { 764 /* merge C into B */ 765 goto mergeBC; 766 } 767 } 768 if (itemp == i) break; 769 } 770 *stacksize = i; 771 PetscFunctionReturn(0); 772 } 773 774 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous 775 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either 776 binary insertion sort or regulat insertion sort */ 777 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) { 778 const PetscInt re = PetscMin(runstart + minrun, n - 1); 779 PetscInt ri = runstart; 780 781 PetscFunctionBegin; 782 if (PetscUnlikely(runstart == n - 1)) { 783 *runend = runstart; 784 PetscFunctionReturn(0); 785 } 786 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 787 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) { 788 ++ri; 789 while (ri < n - 1) { 790 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break; 791 ++ri; 792 } 793 { 794 PetscInt lo = runstart, hi = ri; 795 for (; lo < hi; ++lo, --hi) { COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size); } 796 } 797 } else { 798 ++ri; 799 while (ri < n - 1) { 800 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break; 801 ++ri; 802 } 803 } 804 if (ri < re) { 805 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 806 binary search */ 807 if (ri - runstart <= minrun >> 1) { 808 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 809 PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re); 810 } else { 811 PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re); 812 } 813 *runend = re; 814 } else *runend = ri; 815 PetscFunctionReturn(0); 816 } 817 818 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) { 819 const PetscInt re = PetscMin(runstart + minrun, n - 1); 820 PetscInt ri = runstart; 821 822 PetscFunctionBegin; 823 if (PetscUnlikely(runstart == n - 1)) { 824 *runend = runstart; 825 PetscFunctionReturn(0); 826 } 827 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 828 if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) { 829 ++ri; 830 while (ri < n - 1) { 831 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break; 832 ++ri; 833 } 834 { 835 PetscInt lo = runstart, hi = ri; 836 for (; lo < hi; ++lo, --hi) { COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr); } 837 } 838 } else { 839 ++ri; 840 while (ri < n - 1) { 841 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break; 842 ++ri; 843 } 844 } 845 if (ri < re) { 846 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 847 binary search */ 848 if (ri - runstart <= minrun >> 1) { 849 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 850 PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re); 851 } else { 852 PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re); 853 } 854 *runend = re; 855 } else *runend = ri; 856 PetscFunctionReturn(0); 857 } 858 859 /*@C 860 PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm. 861 862 Not Collective 863 864 Input Parameters: 865 + n - number of values 866 . arr - array to be sorted 867 . size - size in bytes of the datatype held in arr 868 . cmp - function pointer to comparison function 869 - ctx - optional context to be passed to comparison function, NULL if not needed 870 871 Output Parameters: 872 . arr - sorted array 873 874 Notes: 875 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 876 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to 877 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To 878 do so it repeatedly triggers attempts throughout to merge adjacent runs. 879 880 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 881 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 882 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 883 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 884 likely all contain similar data. 885 886 Sample usage: 887 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference 888 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 889 may also 890 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if 891 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing 892 order 893 894 .vb 895 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 896 my_type l = *(my_type *) left, r = *(my_type *) right; 897 return (l < r) ? -1 : (l > r); 898 } 899 .ve 900 Note the context is unused here but you may use it to pass and subsequently access whatever information required 901 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 902 Then pass the function 903 .vb 904 PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx) 905 .ve 906 907 Fortran Notes: 908 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 909 returns result. For example 910 .vb 911 subroutine CompareIntegers(left,right,ctx,result) 912 implicit none 913 914 PetscInt,intent(in) :: left, right 915 type(UserCtx) :: ctx 916 integer,intent(out) :: result 917 918 if (left < right) then 919 result = -1 920 else if (left == right) then 921 result = 0 922 else 923 result = 1 924 end if 925 return 926 end subroutine CompareIntegers 927 .ve 928 929 References: 930 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 931 932 Level: developer 933 934 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()` 935 @*/ 936 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx) { 937 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 938 PetscTimSortStack runstack[128]; 939 PetscTimSortBuffer buff; 940 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 941 It is so unlikely that this limit is reached that this is __never__ checked for */ 942 943 PetscFunctionBegin; 944 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 945 is a power of 2 or one plus a power of 2 */ 946 { 947 PetscInt t = n, r = 0; 948 /* r becomes 1 if the least significant bits contain at least one off bit */ 949 while (t >= 64) { 950 r |= t & 1; 951 t >>= 1; 952 } 953 minrun = t + r; 954 } 955 if (PetscDefined(USE_DEBUG)) { 956 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 957 if (n < 64) { 958 PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n)); 959 } else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 960 } 961 PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr)); 962 buff.size = (size_t)minrun * size; 963 buff.maxsize = (size_t)n * size; 964 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 965 while (runstart < n) { 966 /* Check if additional entries are at least partially ordered and build natural run */ 967 PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend)); 968 runstack[stacksize].start = runstart; 969 runstack[stacksize].size = runend - runstart + 1; 970 PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize)); 971 ++stacksize; 972 runstart = runend + 1; 973 } 974 /* Have been inside while, so discard last stacksize++ */ 975 --stacksize; 976 PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize)); 977 PetscCall(PetscFree(buff.ptr)); 978 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 979 PetscFunctionReturn(0); 980 } 981 982 /*@C 983 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and 984 reorders a second array to match the first. The arrays need not be the same type. 985 986 Not Collective 987 988 Input Parameters: 989 + n - number of values 990 . asize - size in bytes of the datatype held in arr 991 . bsize - size in bytes of the datatype held in barr 992 . cmp - function pointer to comparison function 993 - ctx - optional context to be passed to comparison function, NULL if not needed 994 995 Input/Output Parameters: 996 + arr - array to be sorted, on output it is sorted 997 - barr - array to be reordered, on output it is reordered 998 999 Notes: 1000 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT 1001 overlap. 1002 1003 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 1004 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims 1005 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced 1006 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs. 1007 1008 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 1009 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 1010 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 1011 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 1012 likely all contain similar data. 1013 1014 Sample usage: 1015 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference 1016 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 1017 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only 1018 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in 1019 increasing order 1020 1021 .vb 1022 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 1023 my_type l = *(my_type *) left, r = *(my_type *) right; 1024 return (l < r) ? -1 : (l > r); 1025 } 1026 .ve 1027 Note the context is unused here but you may use it to pass and subsequently access whatever information required 1028 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 1029 Then pass the function 1030 .vb 1031 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx) 1032 .ve 1033 1034 Fortran Notes: 1035 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 1036 returns result. For example 1037 .vb 1038 subroutine CompareIntegers(left,right,ctx,result) 1039 implicit none 1040 1041 PetscInt,intent(in) :: left, right 1042 type(UserCtx) :: ctx 1043 integer,intent(out) :: result 1044 1045 if (left < right) then 1046 result = -1 1047 else if (left == right) then 1048 result = 0 1049 else 1050 result = 1 1051 end if 1052 return 1053 end subroutine CompareIntegers 1054 .ve 1055 1056 References: 1057 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 1058 1059 Level: developer 1060 1061 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()` 1062 @*/ 1063 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx) { 1064 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 1065 PetscTimSortStack runstack[128]; 1066 PetscTimSortBuffer abuff, bbuff; 1067 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 1068 It is so unlikely that this limit is reached that this is __never__ checked for */ 1069 1070 PetscFunctionBegin; 1071 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 1072 is a power of 2 or one plus a power of 2 */ 1073 { 1074 PetscInt t = n, r = 0; 1075 /* r becomes 1 if the least significant bits contain at least one off bit */ 1076 while (t >= 64) { 1077 r |= t & 1; 1078 t >>= 1; 1079 } 1080 minrun = t + r; 1081 } 1082 if (PetscDefined(USE_DEBUG)) { 1083 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 1084 PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun); 1085 } 1086 PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr)); 1087 abuff.size = (size_t)minrun * asize; 1088 abuff.maxsize = (size_t)n * asize; 1089 PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr)); 1090 bbuff.size = (size_t)minrun * bsize; 1091 bbuff.maxsize = (size_t)n * bsize; 1092 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1093 while (runstart < n) { 1094 /* Check if additional entries are at least partially ordered and build natural run */ 1095 PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend)); 1096 runstack[stacksize].start = runstart; 1097 runstack[stacksize].size = runend - runstart + 1; 1098 PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize)); 1099 ++stacksize; 1100 runstart = runend + 1; 1101 } 1102 /* Have been inside while, so discard last stacksize++ */ 1103 --stacksize; 1104 PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize)); 1105 PetscCall(PetscFree(abuff.ptr)); 1106 PetscCall(PetscFree(bbuff.ptr)); 1107 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 /*@ 1112 PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order. 1113 1114 Not Collective 1115 1116 Input Parameters: 1117 + n - number of values 1118 - arr - array of integers 1119 1120 Output Parameters: 1121 . arr - sorted array of integers 1122 1123 Notes: 1124 If the array is less than 64 entries long PetscSortInt() is automatically used. 1125 1126 This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is 1127 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1128 recommended that the user benchmark their code to see which routine is fastest. 1129 1130 Level: intermediate 1131 1132 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()` 1133 @*/ 1134 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[]) { 1135 PetscFunctionBegin; 1136 if (n <= 1) PetscFunctionReturn(0); 1137 PetscValidIntPointer(arr, 2); 1138 if (n < 64) { 1139 PetscCall(PetscSortInt(n, arr)); 1140 } else { 1141 PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 /*@ 1147 PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second 1148 array to match the first. 1149 1150 Not Collective 1151 1152 Input Parameter: 1153 . n - number of values 1154 1155 Input/Output Parameters: 1156 + arr1 - array of integers to be sorted, modified on output 1157 - arr2 - array of integers to be reordered, modified on output 1158 1159 Notes: 1160 The arrays CANNOT overlap. 1161 1162 This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is 1163 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1164 recommended that the user benchmark their code to see which routine is fastest. 1165 1166 Level: intermediate 1167 1168 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()` 1169 @*/ 1170 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[]) { 1171 PetscFunctionBegin; 1172 if (n <= 1) PetscFunctionReturn(0); 1173 PetscValidIntPointer(arr1, 2); 1174 PetscValidIntPointer(arr2, 3); 1175 /* cannot export out to PetscIntSortWithArray here since it isn't stable */ 1176 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 /*@ 1181 PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order. 1182 1183 Not Collective 1184 1185 Input Parameters: 1186 + n - number of values 1187 - arr - array of PetscMPIInts 1188 1189 Output Parameters: 1190 . arr - sorted array of integers 1191 1192 Notes: 1193 If the array is less than 64 entries long PetscSortMPIInt() is automatically used. 1194 1195 This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is 1196 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1197 recommended that the user benchmark their code to see which routine is fastest. 1198 1199 Level: intermediate 1200 1201 .seealso: `PetscTimSort()`, `PetscSortMPIInt()` 1202 @*/ 1203 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[]) { 1204 PetscFunctionBegin; 1205 if (n <= 1) PetscFunctionReturn(0); 1206 PetscValidIntPointer(arr, 2); 1207 if (n < 64) { 1208 PetscCall(PetscSortMPIInt(n, arr)); 1209 } else { 1210 PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 1215 /*@ 1216 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second 1217 array to match the first. 1218 1219 Not Collective 1220 1221 Input Parameter: 1222 . n - number of values 1223 1224 Input/Output Parameters: 1225 . arr1 - array of integers to be sorted, modified on output 1226 - arr2 - array of integers to be reordered, modified on output 1227 1228 Notes: 1229 The arrays CANNOT overlap. 1230 1231 This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is 1232 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1233 recommended that the user benchmark their code to see which routine is fastest. 1234 1235 Level: intermediate 1236 1237 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()` 1238 @*/ 1239 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[]) { 1240 PetscFunctionBegin; 1241 if (n <= 1) PetscFunctionReturn(0); 1242 PetscValidIntPointer(arr1, 2); 1243 PetscValidIntPointer(arr2, 3); 1244 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */ 1245 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /*@ 1250 PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order. 1251 1252 Not Collective 1253 1254 Input Parameters: 1255 + n - number of values 1256 - arr - array of PetscReals 1257 1258 Output Parameters: 1259 . arr - sorted array of integers 1260 1261 Notes: 1262 If the array is less than 64 entries long PetscSortReal() is automatically used. 1263 1264 This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is 1265 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1266 recommended that the user benchmark their code to see which routine is fastest. 1267 1268 Level: intermediate 1269 1270 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()` 1271 @*/ 1272 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[]) { 1273 PetscFunctionBegin; 1274 if (n <= 1) PetscFunctionReturn(0); 1275 PetscValidRealPointer(arr, 2); 1276 if (n < 64) { 1277 PetscCall(PetscSortReal(n, arr)); 1278 } else { 1279 PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL)); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 /*@ 1285 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second 1286 array of PetscInts to match the first. 1287 1288 Not Collective 1289 1290 Input Parameter: 1291 . n - number of values 1292 1293 Input/Output Parameters: 1294 . arr1 - array of PetscReals to be sorted, modified on output 1295 - arr2 - array of PetscReals to be reordered, modified on output 1296 1297 Notes: 1298 This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is 1299 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1300 recommended that the user benchmark their code to see which routine is fastest. 1301 1302 Level: intermediate 1303 1304 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()` 1305 @*/ 1306 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[]) { 1307 PetscFunctionBegin; 1308 if (n <= 1) PetscFunctionReturn(0); 1309 PetscValidRealPointer(arr1, 2); 1310 PetscValidIntPointer(arr2, 3); 1311 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */ 1312 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL)); 1313 PetscFunctionReturn(0); 1314 } 1315