1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 5 { 6 PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right; 7 return (l < r) ? -1 : (l > r); 8 } 9 10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 11 { 12 PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right; 13 return (l < r) ? -1 : (l > r); 14 } 15 16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 17 { 18 PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right; 19 return (l < r) ? -1 : (l > r); 20 } 21 22 #define MIN_GALLOP_CONST_GLOBAL 8 23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 24 typedef int (*CompFunc)(const void *, const void *, void *); 25 26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */ 27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__) 28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 29 { 30 __builtin_memcpy(t, b, size); 31 __builtin_memmove(b, a, size); 32 __builtin_memcpy(a, t, size); 33 return; 34 } 35 36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 37 { 38 __builtin_memcpy(t, ar, asize); 39 __builtin_memmove(ar, al, asize); 40 __builtin_memcpy(al, t, asize); 41 __builtin_memcpy(t, br, bsize); 42 __builtin_memmove(br, bl, bsize); 43 __builtin_memcpy(bl, t, bsize); 44 return; 45 } 46 47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 48 { 49 __builtin_memcpy(dest, src, size); 50 return; 51 } 52 53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 54 { 55 __builtin_memcpy(adest, asrc, asize); 56 __builtin_memcpy(bdest, bsrc, bsize); 57 return; 58 } 59 60 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 61 { 62 __builtin_memmove(dest, src, size); 63 return; 64 } 65 66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 67 { 68 __builtin_memmove(adest, asrc, asize); 69 __builtin_memmove(bdest, bsrc, bsize); 70 return; 71 } 72 # else 73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size) 74 { 75 PetscFunctionBegin; 76 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(t, b, size)); 77 PetscCallAbort(PETSC_COMM_SELF,PetscMemmove(b, a, size)); 78 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(a, t, size)); 79 PetscFunctionReturnVoid(); 80 } 81 82 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 83 { 84 PetscFunctionBegin; 85 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(t, ar, asize)); 86 PetscCallAbort(PETSC_COMM_SELF,PetscMemmove(ar, al, asize)); 87 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(al, t, asize)); 88 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(t, br, bsize)); 89 PetscCallAbort(PETSC_COMM_SELF,PetscMemmove(br, bl, bsize)); 90 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(bl, t, bsize)); 91 PetscFunctionReturnVoid(); 92 } 93 94 static inline void Petsc_memcpy(char *dest, const char *src, size_t size) 95 { 96 PetscFunctionBegin; 97 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(dest, src, size)); 98 PetscFunctionReturnVoid(); 99 } 100 101 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 102 { 103 PetscFunctionBegin; 104 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(adest, asrc, asize)); 105 PetscCallAbort(PETSC_COMM_SELF,PetscMemcpy(bdest, bsrc, bsize)); 106 PetscFunctionReturnVoid(); 107 } 108 109 static inline void Petsc_memmove(char *dest, const char *src, size_t size) 110 { 111 PetscFunctionBegin; 112 PetscCallAbort(PETSC_COMM_SELF,PetscMemmove(dest, src, size)); 113 PetscFunctionReturnVoid(); 114 } 115 116 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 117 { 118 PetscFunctionBegin; 119 PetscCallAbort(PETSC_COMM_SELF,PetscMemmove(adest, asrc, asize)); 120 PetscCallAbort(PETSC_COMM_SELF,PetscMemmove(bdest, bsrc, bsize)); 121 PetscFunctionReturnVoid(); 122 } 123 #endif 124 125 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] > 126 x. Output also inclusive. 127 128 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array: 129 130 {0,1,2,3,4,5} 131 132 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6. 133 */ 134 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 135 { 136 PetscInt last = l, k = 1, mid, cur = l+1; 137 138 PetscFunctionBegin; 139 *m = l; 140 PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft",r,l); 141 if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);} 142 if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0); 143 while (PETSC_TRUE) { 144 if (PetscUnlikely(cur > r)) {cur = r; break;} 145 if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break; 146 last = cur; 147 cur += (k <<= 1) + 1; 148 ++k; 149 } 150 /* standard binary search but take last 0 mid 0 cur 1 into account*/ 151 while (cur > last + 1) { 152 mid = last + ((cur - last) >> 1); 153 if ((*cmp)(x, (arr)+mid*size, ctx) < 0) { 154 cur = mid; 155 } else { 156 last = mid; 157 } 158 } 159 *m = cur; 160 PetscFunctionReturn(0); 161 } 162 163 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m] 164 < x. Output also inclusive */ 165 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 166 { 167 PetscInt last = r, k = 1, mid, cur = r-1; 168 169 PetscFunctionBegin; 170 *m = r; 171 PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight",r,l); 172 if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);} 173 if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0); 174 while (PETSC_TRUE) { 175 if (PetscUnlikely(cur < l)) {cur = l; break;} 176 if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break; 177 last = cur; 178 cur -= (k <<= 1) + 1; 179 ++k; 180 } 181 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/ 182 while (last > cur + 1) { 183 mid = last - ((last - cur) >> 1); 184 if ((*cmp)(x, (arr)+mid*size, ctx) > 0) { 185 cur = mid; 186 } else { 187 last = mid; 188 } 189 } 190 *m = cur; 191 PetscFunctionReturn(0); 192 } 193 194 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to 195 complete array, left is first index of left array, mid is first index of right array, right is last index of right 196 array */ 197 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 198 { 199 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 200 const PetscInt llen = mid-left; 201 202 PetscFunctionBegin; 203 Petsc_memcpy(tarr, arr+(left*size), llen*size); 204 while ((i < llen) && (j <= right)) { 205 if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) { 206 Petsc_memcpy(arr+(k*size), tarr+(i*size), size); 207 ++k; 208 gallopright = 0; 209 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 210 PetscInt l1, l2, diff1, diff2; 211 do { 212 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 213 /* search temp for right[j], can move up to that of temp into arr immediately */ 214 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1)); 215 diff1 = l1-i; 216 Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size); 217 k += diff1; 218 i = l1; 219 /* search right for temp[i], can move up to that many of right into arr */ 220 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2)); 221 diff2 = l2-j; 222 Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size); 223 k += diff2; 224 j = l2; 225 if (i >= llen || j > right) break; 226 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 227 ++MIN_GALLOP_GLOBAL; 228 } 229 } else { 230 Petsc_memmove(arr+(k*size), arr+(j*size), size); 231 ++k; 232 gallopleft = 0; 233 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 234 PetscInt l1, l2, diff1, diff2; 235 do { 236 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 237 /* search right for temp[i], can move up to that many of right into arr */ 238 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2)); 239 diff2 = l2-j; 240 Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size); 241 k += diff2; 242 j = l2; 243 /* search temp for right[j], can copy up to that of temp into arr immediately */ 244 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1)); 245 diff1 = l1-i; 246 Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size); 247 k += diff1; 248 i = l1; 249 if (i >= llen || j > right) break; 250 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 251 ++MIN_GALLOP_GLOBAL; 252 } 253 } 254 } 255 if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);} 256 PetscFunctionReturn(0); 257 } 258 259 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 260 { 261 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 262 const PetscInt llen = mid-left; 263 264 PetscFunctionBegin; 265 Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize); 266 while ((i < llen) && (j <= right)) { 267 if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) { 268 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize); 269 ++k; 270 gallopright = 0; 271 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 272 PetscInt l1, l2, diff1, diff2; 273 do { 274 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 275 /* search temp for right[j], can move up to that of temp into arr immediately */ 276 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1)); 277 diff1 = l1-i; 278 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize); 279 k += diff1; 280 i = l1; 281 /* search right for temp[i], can move up to that many of right into arr */ 282 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2)); 283 diff2 = l2-j; 284 Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize); 285 k += diff2; 286 j = l2; 287 if (i >= llen || j > right) break; 288 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 289 ++MIN_GALLOP_GLOBAL; 290 } 291 } else { 292 Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize); 293 ++k; 294 gallopleft = 0; 295 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 296 PetscInt l1, l2, diff1, diff2; 297 do { 298 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 299 /* search right for temp[i], can move up to that many of right into arr */ 300 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2)); 301 diff2 = l2-j; 302 Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize); 303 k += diff2; 304 j = l2; 305 /* search temp for right[j], can copy up to that of temp into arr immediately */ 306 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1)); 307 diff1 = l1-i; 308 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize); 309 k += diff1; 310 i = l1; 311 if (i >= llen || j > right) break; 312 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 313 ++MIN_GALLOP_GLOBAL; 314 } 315 } 316 } 317 if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);} 318 PetscFunctionReturn(0); 319 } 320 321 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to 322 complete array, left is first index of left array, mid is first index of right array, right is last index of right 323 array */ 324 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 325 { 326 PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0; 327 const PetscInt rlen = right-mid+1; 328 329 PetscFunctionBegin; 330 Petsc_memcpy(tarr, (arr)+mid*size, rlen*size); 331 while ((i >= 0) && (j >= left)) { 332 if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) { 333 Petsc_memcpy((arr)+k*size, (tarr)+i*size, size); 334 --k; 335 gallopleft = 0; 336 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 337 PetscInt l1, l2, diff1, diff2; 338 do { 339 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 340 /* search temp for left[j], can copy up to that many of temp into arr */ 341 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1)); 342 diff1 = i-l1; 343 Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size); 344 k -= diff1; 345 i = l1; 346 /* search left for temp[i], can move up to that many of left up arr */ 347 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2)); 348 diff2 = j-l2; 349 Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size); 350 k -= diff2; 351 j = l2; 352 if (i < 0 || j < left) break; 353 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 354 ++MIN_GALLOP_GLOBAL; 355 } 356 } else { 357 Petsc_memmove((arr)+k*size, (arr)+j*size, size); 358 --k; 359 gallopright = 0; 360 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 361 PetscInt l1, l2, diff1, diff2; 362 do { 363 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 364 /* search left for temp[i], can move up to that many of left up arr */ 365 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2)); 366 diff2 = j-l2; 367 Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size); 368 k -= diff2; 369 j = l2; 370 /* search temp for left[j], can copy up to that many of temp into arr */ 371 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1)); 372 diff1 = i-l1; 373 Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size); 374 k -= diff1; 375 i = l1; 376 if (i < 0 || j < left) break; 377 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 378 ++MIN_GALLOP_GLOBAL; 379 } 380 } 381 } 382 if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);} 383 PetscFunctionReturn(0); 384 } 385 386 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 387 { 388 PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0; 389 const PetscInt rlen = right-mid+1; 390 391 PetscFunctionBegin; 392 Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize); 393 while ((i >= 0) && (j >= left)) { 394 if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) { 395 Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize); 396 --k; 397 gallopleft = 0; 398 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 399 PetscInt l1, l2, diff1, diff2; 400 do { 401 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 402 /* search temp for left[j], can copy up to that many of temp into arr */ 403 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1)); 404 diff1 = i-l1; 405 Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize); 406 k -= diff1; 407 i = l1; 408 /* search left for temp[i], can move up to that many of left up arr */ 409 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2)); 410 diff2 = j-l2; 411 Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize); 412 k -= diff2; 413 j = l2; 414 if (i < 0 || j < left) break; 415 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 416 ++MIN_GALLOP_GLOBAL; 417 } 418 } else { 419 Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize); 420 --k; 421 gallopright = 0; 422 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 423 PetscInt l1, l2, diff1, diff2; 424 do { 425 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 426 /* search left for temp[i], can move up to that many of left up arr */ 427 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2)); 428 diff2 = j-l2; 429 Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize); 430 k -= diff2; 431 j = l2; 432 /* search temp for left[j], can copy up to that many of temp into arr */ 433 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1)); 434 diff1 = i-l1; 435 Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize); 436 k -= diff1; 437 i = l1; 438 if (i < 0 || j < left) break; 439 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 440 ++MIN_GALLOP_GLOBAL; 441 } 442 } 443 } 444 if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);} 445 PetscFunctionReturn(0); 446 } 447 448 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper 449 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */ 450 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 451 { 452 PetscInt i = start == left ? start+1 : start; 453 454 PetscFunctionBegin; 455 for (; i <= right; ++i) { 456 PetscInt j = i-1; 457 Petsc_memcpy(tarr, arr+(i*size), size); 458 while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) { 459 COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size); 460 --j; 461 } 462 Petsc_memcpy((arr)+(j+1)*size, tarr, size); 463 } 464 PetscFunctionReturn(0); 465 } 466 467 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 468 { 469 PetscInt i = start == left ? start+1 : start; 470 471 PetscFunctionBegin; 472 for (; i <= right; ++i) { 473 PetscInt j = i-1; 474 Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize); 475 while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) { 476 COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize); 477 --j; 478 } 479 Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 /* See PetscInsertionSort_Private */ 485 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 486 { 487 PetscInt i = start == left ? start+1 : start; 488 489 PetscFunctionBegin; 490 for (; i <= right; ++i) { 491 PetscInt l = left, r = i; 492 Petsc_memcpy(tarr, arr+(i*size), size); 493 do { 494 const PetscInt m = l + ((r - l) >> 1); 495 if ((*cmp)(tarr, arr+(m*size), ctx) < 0) { 496 r = m; 497 } else { 498 l = m + 1; 499 } 500 } while (l < r); 501 Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size); 502 Petsc_memcpy(arr+(l*size), tarr, size); 503 } 504 PetscFunctionReturn(0); 505 } 506 507 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 508 { 509 PetscInt i = start == left ? start+1 : start; 510 511 PetscFunctionBegin; 512 for (; i <= right; ++i) { 513 PetscInt l = left, r = i; 514 Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize); 515 do { 516 const PetscInt m = l + ((r - l) >> 1); 517 if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) { 518 r = m; 519 } else { 520 l = m + 1; 521 } 522 } while (l < r); 523 Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize); 524 Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 typedef struct { 530 PetscInt size; 531 PetscInt start; 532 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */ 533 } PetscTimSortStack; 534 #else 535 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt)); 536 #endif 537 538 typedef struct { 539 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN); 540 size_t size; 541 size_t maxsize; 542 } PetscTimSortBuffer; 543 544 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize) 545 { 546 PetscFunctionBegin; 547 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0); 548 { 549 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */ 550 size_t newMax = PetscMin(newSize*newSize, buff->maxsize); 551 PetscCall(PetscFree(buff->ptr)); 552 PetscCall(PetscMalloc1(newMax, &buff->ptr)); 553 buff->size = newMax; 554 } 555 PetscFunctionReturn(0); 556 } 557 558 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize) 559 { 560 PetscFunctionBegin; 561 for (;stacksize; --stacksize) { 562 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 563 if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) { 564 PetscInt l, m = stack[stacksize].start, r; 565 /* Search A for B[0] insertion */ 566 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l)); 567 /* Search B for A[-1] insertion */ 568 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r)); 569 if (m-l <= r-m) { 570 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size)); 571 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 572 } else { 573 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size)); 574 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 575 } 576 } 577 /* Update A with merge */ 578 stack[stacksize-1].size += stack[stacksize].size; 579 } 580 PetscFunctionReturn(0); 581 } 582 583 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize) 584 { 585 PetscFunctionBegin; 586 for (;stacksize; --stacksize) { 587 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 588 if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) { 589 PetscInt l, m = stack[stacksize].start, r; 590 /* Search A for B[0] insertion */ 591 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l)); 592 /* Search B for A[-1] insertion */ 593 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r)); 594 if (m-l <= r-m) { 595 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize)); 596 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize)); 597 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 598 } else { 599 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize)); 600 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize)); 601 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 602 } 603 } 604 /* Update A with merge */ 605 stack[stacksize-1].size += stack[stacksize].size; 606 } 607 PetscFunctionReturn(0); 608 } 609 610 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize) 611 { 612 PetscInt i = *stacksize; 613 614 PetscFunctionBegin; 615 while (i) { 616 PetscInt l, m, r, itemp = i; 617 618 if (i == 1) { 619 /* A = stack[i-1], B = stack[i] */ 620 if (stack[i-1].size < stack[i].size) { 621 /* if A[-1] <= B[0] then sorted */ 622 if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) { 623 m = stack[i].start; 624 /* Search A for B[0] insertion */ 625 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l)); 626 /* Search B for A[-1] insertion */ 627 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r)); 628 if (m-l <= r-m) { 629 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size)); 630 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 631 } else { 632 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size)); 633 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 634 } 635 } 636 /* Update A with merge */ 637 stack[i-1].size += stack[i].size; 638 --i; 639 } 640 } else { 641 /* i > 2, i.e. C exists 642 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 643 if (stack[i-2].size <= stack[i-1].size+stack[i].size) { 644 if (stack[i-2].size < stack[i].size) { 645 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 646 if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) { 647 m = stack[i-1].start; 648 /* Search A for B[0] insertion */ 649 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l)); 650 /* Search B for A[-1] insertion */ 651 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r)); 652 if (m-l <= r-m) { 653 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size)); 654 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 655 } else { 656 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size)); 657 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 658 } 659 } 660 /* Update A with merge */ 661 stack[i-2].size += stack[i-1].size; 662 /* Push C up the stack */ 663 stack[i-1].start = stack[i].start; 664 stack[i-1].size = stack[i].size; 665 } else { 666 /* merge C into B */ 667 mergeBC: 668 /* If B[-1] <= C[0] then... you know the drill */ 669 if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) { 670 m = stack[i].start; 671 /* Search B for C[0] insertion */ 672 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l)); 673 /* Search C for B[-1] insertion */ 674 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r)); 675 if (m-l <= r-m) { 676 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size)); 677 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 678 } else { 679 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size)); 680 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r)); 681 } 682 } 683 /* Update B with merge */ 684 stack[i-1].size += stack[i].size; 685 } 686 --i; 687 } else if (stack[i-1].size <= stack[i].size) { 688 /* merge C into B */ 689 goto mergeBC; 690 } 691 } 692 if (itemp == i) break; 693 } 694 *stacksize = i; 695 PetscFunctionReturn(0); 696 } 697 698 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize) 699 { 700 PetscInt i = *stacksize; 701 702 PetscFunctionBegin; 703 while (i) { 704 PetscInt l, m, r, itemp = i; 705 706 if (i == 1) { 707 /* A = stack[i-1], B = stack[i] */ 708 if (stack[i-1].size < stack[i].size) { 709 /* if A[-1] <= B[0] then sorted */ 710 if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) { 711 m = stack[i].start; 712 /* Search A for B[0] insertion */ 713 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l)); 714 /* Search B for A[-1] insertion */ 715 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r)); 716 if (m-l <= r-m) { 717 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize)); 718 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize)); 719 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 720 } else { 721 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize)); 722 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize)); 723 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 724 } 725 } 726 /* Update A with merge */ 727 stack[i-1].size += stack[i].size; 728 --i; 729 } 730 } else { 731 /* i > 2, i.e. C exists 732 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 733 if (stack[i-2].size <= stack[i-1].size+stack[i].size) { 734 if (stack[i-2].size < stack[i].size) { 735 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 736 if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) { 737 m = stack[i-1].start; 738 /* Search A for B[0] insertion */ 739 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l)); 740 /* Search B for A[-1] insertion */ 741 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r)); 742 if (m-l <= r-m) { 743 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize)); 744 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize)); 745 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 746 } else { 747 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize)); 748 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize)); 749 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 750 } 751 } 752 /* Update A with merge */ 753 stack[i-2].size += stack[i-1].size; 754 /* Push C up the stack */ 755 stack[i-1].start = stack[i].start; 756 stack[i-1].size = stack[i].size; 757 } else { 758 /* merge C into B */ 759 mergeBC: 760 /* If B[-1] <= C[0] then... you know the drill */ 761 if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) { 762 m = stack[i].start; 763 /* Search B for C[0] insertion */ 764 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l)); 765 /* Search C for B[-1] insertion */ 766 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r)); 767 if (m-l <= r-m) { 768 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize)); 769 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize)); 770 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 771 } else { 772 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize)); 773 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize)); 774 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r)); 775 } 776 } 777 /* Update B with merge */ 778 stack[i-1].size += stack[i].size; 779 } 780 --i; 781 } else if (stack[i-1].size <= stack[i].size) { 782 /* merge C into B */ 783 goto mergeBC; 784 } 785 } 786 if (itemp == i) break; 787 } 788 *stacksize = i; 789 PetscFunctionReturn(0); 790 } 791 792 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous 793 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either 794 binary insertion sort or regulat insertion sort */ 795 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 796 { 797 const PetscInt re = PetscMin(runstart+minrun, n-1); 798 PetscInt ri = runstart; 799 800 PetscFunctionBegin; 801 if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);} 802 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 803 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) { 804 ++ri; 805 while (ri < n-1) { 806 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break; 807 ++ri; 808 } 809 { 810 PetscInt lo = runstart, hi = ri; 811 for (; lo < hi; ++lo, --hi) { 812 COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size); 813 } 814 } 815 } else { 816 ++ri; 817 while (ri < n-1) { 818 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break; 819 ++ri; 820 } 821 } 822 if (ri < re) { 823 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 824 binary search */ 825 if (ri-runstart <= minrun >> 1) { 826 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 827 PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re); 828 } else { 829 PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re); 830 } 831 *runend = re; 832 } else *runend = ri; 833 PetscFunctionReturn(0); 834 } 835 836 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 837 { 838 const PetscInt re = PetscMin(runstart+minrun, n-1); 839 PetscInt ri = runstart; 840 841 PetscFunctionBegin; 842 if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);} 843 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 844 if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) { 845 ++ri; 846 while (ri < n-1) { 847 if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break; 848 ++ri; 849 } 850 { 851 PetscInt lo = runstart, hi = ri; 852 for (; lo < hi; ++lo, --hi) { 853 COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr); 854 } 855 } 856 } else { 857 ++ri; 858 while (ri < n-1) { 859 if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break; 860 ++ri; 861 } 862 } 863 if (ri < re) { 864 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 865 binary search */ 866 if (ri-runstart <= minrun >> 1) { 867 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 868 PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re); 869 } else { 870 PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re); 871 } 872 *runend = re; 873 } else *runend = ri; 874 PetscFunctionReturn(0); 875 } 876 877 /*@C 878 PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm. 879 880 Not Collective 881 882 Input Parameters: 883 + n - number of values 884 . arr - array to be sorted 885 . size - size in bytes of the datatype held in arr 886 . cmp - function pointer to comparison function 887 - ctx - optional context to be passed to comparison function, NULL if not needed 888 889 Output Parameters: 890 . arr - sorted array 891 892 Notes: 893 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 894 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to 895 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To 896 do so it repeatedly triggers attempts throughout to merge adjacent runs. 897 898 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 899 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 900 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 901 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 902 likely all contain similar data. 903 904 Sample usage: 905 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference 906 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 907 may also 908 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if 909 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing 910 order 911 912 .vb 913 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 914 my_type l = *(my_type *) left, r = *(my_type *) right; 915 return (l < r) ? -1 : (l > r); 916 } 917 .ve 918 Note the context is unused here but you may use it to pass and subsequently access whatever information required 919 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 920 Then pass the function 921 .vb 922 PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx) 923 .ve 924 925 Fortran Notes: 926 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 927 returns result. For example 928 .vb 929 subroutine CompareIntegers(left,right,ctx,result) 930 implicit none 931 932 PetscInt,intent(in) :: left, right 933 type(UserCtx) :: ctx 934 integer,intent(out) :: result 935 936 if (left < right) then 937 result = -1 938 else if (left == right) then 939 result = 0 940 else 941 result = 1 942 end if 943 return 944 end subroutine CompareIntegers 945 .ve 946 947 References: 948 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 949 950 Level: developer 951 952 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()` 953 @*/ 954 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx) 955 { 956 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 957 PetscTimSortStack runstack[128]; 958 PetscTimSortBuffer buff; 959 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 960 It is so unlikely that this limit is reached that this is __never__ checked for */ 961 962 PetscFunctionBegin; 963 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 964 is a power of 2 or one plus a power of 2 */ 965 { 966 PetscInt t = n, r = 0; 967 /* r becomes 1 if the least significant bits contain at least one off bit */ 968 while (t >= 64) { 969 r |= t & 1; 970 t >>= 1; 971 } 972 minrun = t + r; 973 } 974 if (PetscDefined(USE_DEBUG)) { 975 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 976 if (n < 64) { 977 PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n)); 978 } else PetscCheck(!(minrun < 32) && !(minrun > 65),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %" PetscInt_FMT " not in range (32,65)",minrun); 979 } 980 PetscCall(PetscMalloc1((size_t) minrun*size, &buff.ptr)); 981 buff.size = (size_t) minrun*size; 982 buff.maxsize = (size_t) n*size; 983 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 984 while (runstart < n) { 985 /* Check if additional entries are at least partially ordered and build natural run */ 986 PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend)); 987 runstack[stacksize].start = runstart; 988 runstack[stacksize].size = runend-runstart+1; 989 PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize)); 990 ++stacksize; 991 runstart = runend+1; 992 } 993 /* Have been inside while, so discard last stacksize++ */ 994 --stacksize; 995 PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize)); 996 PetscCall(PetscFree(buff.ptr)); 997 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 998 PetscFunctionReturn(0); 999 } 1000 1001 /*@C 1002 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and 1003 reorders a second array to match the first. The arrays need not be the same type. 1004 1005 Not Collective 1006 1007 Input Parameters: 1008 + n - number of values 1009 . asize - size in bytes of the datatype held in arr 1010 . bsize - size in bytes of the datatype held in barr 1011 . cmp - function pointer to comparison function 1012 - ctx - optional context to be passed to comparison function, NULL if not needed 1013 1014 Input/Output Parameters: 1015 + arr - array to be sorted, on output it is sorted 1016 - barr - array to be reordered, on output it is reordered 1017 1018 Notes: 1019 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT 1020 overlap. 1021 1022 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 1023 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims 1024 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced 1025 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs. 1026 1027 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 1028 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 1029 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 1030 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 1031 likely all contain similar data. 1032 1033 Sample usage: 1034 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference 1035 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 1036 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only 1037 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in 1038 increasing order 1039 1040 .vb 1041 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 1042 my_type l = *(my_type *) left, r = *(my_type *) right; 1043 return (l < r) ? -1 : (l > r); 1044 } 1045 .ve 1046 Note the context is unused here but you may use it to pass and subsequently access whatever information required 1047 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 1048 Then pass the function 1049 .vb 1050 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx) 1051 .ve 1052 1053 Fortran Notes: 1054 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 1055 returns result. For example 1056 .vb 1057 subroutine CompareIntegers(left,right,ctx,result) 1058 implicit none 1059 1060 PetscInt,intent(in) :: left, right 1061 type(UserCtx) :: ctx 1062 integer,intent(out) :: result 1063 1064 if (left < right) then 1065 result = -1 1066 else if (left == right) then 1067 result = 0 1068 else 1069 result = 1 1070 end if 1071 return 1072 end subroutine CompareIntegers 1073 .ve 1074 1075 References: 1076 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt 1077 1078 Level: developer 1079 1080 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()` 1081 @*/ 1082 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx) 1083 { 1084 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 1085 PetscTimSortStack runstack[128]; 1086 PetscTimSortBuffer abuff, bbuff; 1087 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 1088 It is so unlikely that this limit is reached that this is __never__ checked for */ 1089 1090 PetscFunctionBegin; 1091 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 1092 is a power of 2 or one plus a power of 2 */ 1093 { 1094 PetscInt t = n, r = 0; 1095 /* r becomes 1 if the least significant bits contain at least one off bit */ 1096 while (t >= 64) { 1097 r |= t & 1; 1098 t >>= 1; 1099 } 1100 minrun = t + r; 1101 } 1102 if (PetscDefined(USE_DEBUG)) { 1103 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun)); 1104 PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %" PetscInt_FMT " not in range (32,65)",minrun); 1105 } 1106 PetscCall(PetscMalloc1((size_t) minrun*asize, &abuff.ptr)); 1107 abuff.size = (size_t) minrun*asize; 1108 abuff.maxsize = (size_t) n*asize; 1109 PetscCall(PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr)); 1110 bbuff.size = (size_t) minrun*bsize; 1111 bbuff.maxsize = (size_t) n*bsize; 1112 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1113 while (runstart < n) { 1114 /* Check if additional entries are at least partially ordered and build natural run */ 1115 PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend)); 1116 runstack[stacksize].start = runstart; 1117 runstack[stacksize].size = runend-runstart+1; 1118 PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize)); 1119 ++stacksize; 1120 runstart = runend+1; 1121 } 1122 /* Have been inside while, so discard last stacksize++ */ 1123 --stacksize; 1124 PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize)); 1125 PetscCall(PetscFree(abuff.ptr)); 1126 PetscCall(PetscFree(bbuff.ptr)); 1127 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1128 PetscFunctionReturn(0); 1129 } 1130 1131 /*@ 1132 PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order. 1133 1134 Not Collective 1135 1136 Input Parameters: 1137 + n - number of values 1138 - arr - array of integers 1139 1140 Output Parameters: 1141 . arr - sorted array of integers 1142 1143 Notes: 1144 If the array is less than 64 entries long PetscSortInt() is automatically used. 1145 1146 This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is 1147 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1148 recommended that the user benchmark their code to see which routine is fastest. 1149 1150 Level: intermediate 1151 1152 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()` 1153 @*/ 1154 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[]) 1155 { 1156 PetscFunctionBegin; 1157 if (n <= 1) PetscFunctionReturn(0); 1158 PetscValidIntPointer(arr,2); 1159 if (n < 64) { 1160 PetscCall(PetscSortInt(n, arr)); 1161 } else { 1162 PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 /*@ 1168 PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second 1169 array to match the first. 1170 1171 Not Collective 1172 1173 Input Parameter: 1174 . n - number of values 1175 1176 Input/Output Parameters: 1177 + arr1 - array of integers to be sorted, modified on output 1178 - arr2 - array of integers to be reordered, modified on output 1179 1180 Notes: 1181 The arrays CANNOT overlap. 1182 1183 This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is 1184 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1185 recommended that the user benchmark their code to see which routine is fastest. 1186 1187 Level: intermediate 1188 1189 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()` 1190 @*/ 1191 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[]) 1192 { 1193 PetscFunctionBegin; 1194 if (n <= 1) PetscFunctionReturn(0); 1195 PetscValidIntPointer(arr1,2); 1196 PetscValidIntPointer(arr2,3); 1197 /* cannot export out to PetscIntSortWithArray here since it isn't stable */ 1198 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL)); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 /*@ 1203 PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order. 1204 1205 Not Collective 1206 1207 Input Parameters: 1208 + n - number of values 1209 - arr - array of PetscMPIInts 1210 1211 Output Parameters: 1212 . arr - sorted array of integers 1213 1214 Notes: 1215 If the array is less than 64 entries long PetscSortMPIInt() is automatically used. 1216 1217 This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is 1218 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1219 recommended that the user benchmark their code to see which routine is fastest. 1220 1221 Level: intermediate 1222 1223 .seealso: `PetscTimSort()`, `PetscSortMPIInt()` 1224 @*/ 1225 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[]) 1226 { 1227 PetscFunctionBegin; 1228 if (n <= 1) PetscFunctionReturn(0); 1229 PetscValidIntPointer(arr,2); 1230 if (n < 64) { 1231 PetscCall(PetscSortMPIInt(n, arr)); 1232 } else { 1233 PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /*@ 1239 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second 1240 array to match the first. 1241 1242 Not Collective 1243 1244 Input Parameter: 1245 . n - number of values 1246 1247 Input/Output Parameters: 1248 . arr1 - array of integers to be sorted, modified on output 1249 - arr2 - array of integers to be reordered, modified on output 1250 1251 Notes: 1252 The arrays CANNOT overlap. 1253 1254 This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is 1255 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1256 recommended that the user benchmark their code to see which routine is fastest. 1257 1258 Level: intermediate 1259 1260 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()` 1261 @*/ 1262 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[]) 1263 { 1264 PetscFunctionBegin; 1265 if (n <= 1) PetscFunctionReturn(0); 1266 PetscValidIntPointer(arr1,2); 1267 PetscValidIntPointer(arr2,3); 1268 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */ 1269 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL)); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 /*@ 1274 PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order. 1275 1276 Not Collective 1277 1278 Input Parameters: 1279 + n - number of values 1280 - arr - array of PetscReals 1281 1282 Output Parameters: 1283 . arr - sorted array of integers 1284 1285 Notes: 1286 If the array is less than 64 entries long PetscSortReal() is automatically used. 1287 1288 This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is 1289 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1290 recommended that the user benchmark their code to see which routine is fastest. 1291 1292 Level: intermediate 1293 1294 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()` 1295 @*/ 1296 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[]) 1297 { 1298 PetscFunctionBegin; 1299 if (n <= 1) PetscFunctionReturn(0); 1300 PetscValidRealPointer(arr,2); 1301 if (n < 64) { 1302 PetscCall(PetscSortReal(n, arr)); 1303 } else { 1304 PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL)); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 /*@ 1310 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second 1311 array of PetscInts to match the first. 1312 1313 Not Collective 1314 1315 Input Parameter: 1316 . n - number of values 1317 1318 Input/Output Parameters: 1319 . arr1 - array of PetscReals to be sorted, modified on output 1320 - arr2 - array of PetscReals to be reordered, modified on output 1321 1322 Notes: 1323 This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is 1324 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1325 recommended that the user benchmark their code to see which routine is fastest. 1326 1327 Level: intermediate 1328 1329 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()` 1330 @*/ 1331 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[]) 1332 { 1333 PetscFunctionBegin; 1334 if (n <= 1) PetscFunctionReturn(0); 1335 PetscValidRealPointer(arr1,2); 1336 PetscValidIntPointer(arr2,3); 1337 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */ 1338 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL)); 1339 PetscFunctionReturn(0); 1340 } 1341