1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 5 { 6 PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right; 7 return (l < r) ? -1 : (l > r); 8 } 9 10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 11 { 12 PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right; 13 return (l < r) ? -1 : (l > r); 14 } 15 16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx) 17 { 18 PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right; 19 return (l < r) ? -1 : (l > r); 20 } 21 22 #define MIN_GALLOP_CONST_GLOBAL 8 23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 24 typedef int (*CompFunc)(const void *, const void *, void *); 25 26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */ 27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__) 28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size) 29 { 30 __builtin_memcpy(t, b, size); 31 __builtin_memmove(b, a, size); 32 __builtin_memcpy(a, t, size); 33 return; 34 } 35 36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 37 { 38 __builtin_memcpy(t, ar, asize); 39 __builtin_memmove(ar, al, asize); 40 __builtin_memcpy(al, t, asize); 41 __builtin_memcpy(t, br, bsize); 42 __builtin_memmove(br, bl, bsize); 43 __builtin_memcpy(bl, t, bsize); 44 return; 45 } 46 47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size) 48 { 49 __builtin_memcpy(dest, src, size); 50 return; 51 } 52 53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 54 { 55 __builtin_memcpy(adest, asrc, asize); 56 __builtin_memcpy(bdest, bsrc, bsize); 57 return; 58 } 59 60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size) 61 { 62 __builtin_memmove(dest, src, size); 63 return; 64 } 65 66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 67 { 68 __builtin_memmove(adest, asrc, asize); 69 __builtin_memmove(bdest, bsrc, bsize); 70 return; 71 } 72 # else 73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size) 74 { 75 PetscErrorCode ierr; 76 PetscFunctionBegin; 77 ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr); 78 ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr); 79 ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr); 80 PetscFunctionReturnVoid(); 81 } 82 83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t) 84 { 85 PetscErrorCode ierr; 86 PetscFunctionBegin; 87 ierr = PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr); 88 ierr = PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr); 89 ierr = PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr); 90 ierr = PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr); 91 ierr = PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr); 92 ierr = PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr); 93 PetscFunctionReturnVoid(); 94 } 95 96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size) 97 { 98 PetscErrorCode ierr; 99 PetscFunctionBegin; 100 ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr); 101 PetscFunctionReturnVoid(); 102 } 103 104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 105 { 106 PetscErrorCode ierr; 107 PetscFunctionBegin; 108 ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr); 109 ierr = PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr); 110 PetscFunctionReturnVoid(); 111 } 112 113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size) 114 { 115 PetscErrorCode ierr; 116 PetscFunctionBegin; 117 ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr); 118 PetscFunctionReturnVoid(); 119 } 120 121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize) 122 { 123 PetscErrorCode ierr; 124 PetscFunctionBegin; 125 ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr); 126 ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr); 127 PetscFunctionReturnVoid(); 128 } 129 #endif 130 131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] > 132 x. Output also inclusive. 133 134 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array: 135 136 {0,1,2,3,4,5} 137 138 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6. 139 */ 140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 141 { 142 PetscInt last = l, k = 1, mid, cur = l+1; 143 144 PetscFunctionBegin; 145 *m = l; 146 if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l); 147 if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);} 148 if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0); 149 while (PETSC_TRUE) { 150 if (PetscUnlikely(cur > r)) {cur = r; break;} 151 if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break; 152 last = cur; 153 cur += (k <<= 1) + 1; 154 ++k; 155 } 156 /* standard binary search but take last 0 mid 0 cur 1 into account*/ 157 while (cur > last + 1) { 158 mid = last + ((cur - last) >> 1); 159 if ((*cmp)(x, (arr)+mid*size, ctx) < 0) { 160 cur = mid; 161 } else { 162 last = mid; 163 } 164 } 165 *m = cur; 166 PetscFunctionReturn(0); 167 } 168 169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m] 170 < x. Output also inclusive */ 171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m) 172 { 173 PetscInt last = r, k = 1, mid, cur = r-1; 174 175 PetscFunctionBegin; 176 *m = r; 177 if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l); 178 if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);} 179 if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0); 180 while (PETSC_TRUE) { 181 if (PetscUnlikely(cur < l)) {cur = l; break;} 182 if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break; 183 last = cur; 184 cur -= (k <<= 1) + 1; 185 ++k; 186 } 187 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/ 188 while (last > cur + 1) { 189 mid = last - ((last - cur) >> 1); 190 if ((*cmp)(x, (arr)+mid*size, ctx) > 0) { 191 cur = mid; 192 } else { 193 last = mid; 194 } 195 } 196 *m = cur; 197 PetscFunctionReturn(0); 198 } 199 200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to 201 complete array, left is first index of left array, mid is first index of right array, right is last index of right 202 array */ 203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 204 { 205 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 206 const PetscInt llen = mid-left; 207 PetscErrorCode ierr; 208 209 PetscFunctionBegin; 210 Petsc_memcpy(tarr, arr+(left*size), llen*size); 211 while ((i < llen) && (j <= right)) { 212 if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) { 213 Petsc_memcpy(arr+(k*size), tarr+(i*size), size); 214 ++k; 215 gallopright = 0; 216 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 217 PetscInt l1, l2, diff1, diff2; 218 do { 219 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 220 /* search temp for right[j], can move up to that of temp into arr immediately */ 221 ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr); 222 diff1 = l1-i; 223 Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size); 224 k += diff1; 225 i = l1; 226 /* search right for temp[i], can move up to that many of right into arr */ 227 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr); 228 diff2 = l2-j; 229 Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size); 230 k += diff2; 231 j = l2; 232 if (i >= llen || j > right) break; 233 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 234 ++MIN_GALLOP_GLOBAL; 235 } 236 } else { 237 Petsc_memmove(arr+(k*size), arr+(j*size), size); 238 ++k; 239 gallopleft = 0; 240 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 241 PetscInt l1, l2, diff1, diff2; 242 do { 243 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 244 /* search right for temp[i], can move up to that many of right into arr */ 245 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr); 246 diff2 = l2-j; 247 Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size); 248 k += diff2; 249 j = l2; 250 /* search temp for right[j], can copy up to that of temp into arr immediately */ 251 ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr); 252 diff1 = l1-i; 253 Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size); 254 k += diff1; 255 i = l1; 256 if (i >= llen || j > right) break; 257 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 258 ++MIN_GALLOP_GLOBAL; 259 } 260 } 261 } 262 if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);} 263 PetscFunctionReturn(0); 264 } 265 266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 267 { 268 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0; 269 const PetscInt llen = mid-left; 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize); 274 while ((i < llen) && (j <= right)) { 275 if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) { 276 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize); 277 ++k; 278 gallopright = 0; 279 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) { 280 PetscInt l1, l2, diff1, diff2; 281 do { 282 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 283 /* search temp for right[j], can move up to that of temp into arr immediately */ 284 ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr); 285 diff1 = l1-i; 286 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize); 287 k += diff1; 288 i = l1; 289 /* search right for temp[i], can move up to that many of right into arr */ 290 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr); 291 diff2 = l2-j; 292 Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize); 293 k += diff2; 294 j = l2; 295 if (i >= llen || j > right) break; 296 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 297 ++MIN_GALLOP_GLOBAL; 298 } 299 } else { 300 Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize); 301 ++k; 302 gallopleft = 0; 303 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) { 304 PetscInt l1, l2, diff1, diff2; 305 do { 306 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 307 /* search right for temp[i], can move up to that many of right into arr */ 308 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr); 309 diff2 = l2-j; 310 Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize); 311 k += diff2; 312 j = l2; 313 /* search temp for right[j], can copy up to that of temp into arr immediately */ 314 ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr); 315 diff1 = l1-i; 316 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize); 317 k += diff1; 318 i = l1; 319 if (i >= llen || j > right) break; 320 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 321 ++MIN_GALLOP_GLOBAL; 322 } 323 } 324 } 325 if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);} 326 PetscFunctionReturn(0); 327 } 328 329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to 330 complete array, left is first index of left array, mid is first index of right array, right is last index of right 331 array */ 332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 333 { 334 PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0; 335 const PetscInt rlen = right-mid+1; 336 PetscErrorCode ierr; 337 338 PetscFunctionBegin; 339 Petsc_memcpy(tarr, (arr)+mid*size, rlen*size); 340 while ((i >= 0) && (j >= left)) { 341 if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) { 342 Petsc_memcpy((arr)+k*size, (tarr)+i*size, size); 343 --k; 344 gallopleft = 0; 345 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 346 PetscInt l1, l2, diff1, diff2; 347 do { 348 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 349 /* search temp for left[j], can copy up to that many of temp into arr */ 350 ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr); 351 diff1 = i-l1; 352 Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size); 353 k -= diff1; 354 i = l1; 355 /* search left for temp[i], can move up to that many of left up arr */ 356 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr); 357 diff2 = j-l2; 358 Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size); 359 k -= diff2; 360 j = l2; 361 if (i < 0 || j < left) break; 362 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 363 ++MIN_GALLOP_GLOBAL; 364 } 365 } else { 366 Petsc_memmove((arr)+k*size, (arr)+j*size, size); 367 --k; 368 gallopright = 0; 369 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 370 PetscInt l1, l2, diff1, diff2; 371 do { 372 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 373 /* search left for temp[i], can move up to that many of left up arr */ 374 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr); 375 diff2 = j-l2; 376 Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size); 377 k -= diff2; 378 j = l2; 379 /* search temp for left[j], can copy up to that many of temp into arr */ 380 ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr); 381 diff1 = i-l1; 382 Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size); 383 k -= diff1; 384 i = l1; 385 if (i < 0 || j < left) break; 386 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 387 ++MIN_GALLOP_GLOBAL; 388 } 389 } 390 } 391 if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);} 392 PetscFunctionReturn(0); 393 } 394 395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right) 396 { 397 PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0; 398 const PetscInt rlen = right-mid+1; 399 PetscErrorCode ierr; 400 401 PetscFunctionBegin; 402 Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize); 403 while ((i >= 0) && (j >= left)) { 404 if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) { 405 Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize); 406 --k; 407 gallopleft = 0; 408 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) { 409 PetscInt l1, l2, diff1, diff2; 410 do { 411 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 412 /* search temp for left[j], can copy up to that many of temp into arr */ 413 ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr); 414 diff1 = i-l1; 415 Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize); 416 k -= diff1; 417 i = l1; 418 /* search left for temp[i], can move up to that many of left up arr */ 419 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr); 420 diff2 = j-l2; 421 Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize); 422 k -= diff2; 423 j = l2; 424 if (i < 0 || j < left) break; 425 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 426 ++MIN_GALLOP_GLOBAL; 427 } 428 } else { 429 Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize); 430 --k; 431 gallopright = 0; 432 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) { 433 PetscInt l1, l2, diff1, diff2; 434 do { 435 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL; 436 /* search left for temp[i], can move up to that many of left up arr */ 437 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr); 438 diff2 = j-l2; 439 Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize); 440 k -= diff2; 441 j = l2; 442 /* search temp for left[j], can copy up to that many of temp into arr */ 443 ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr); 444 diff1 = i-l1; 445 Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize); 446 k -= diff1; 447 i = l1; 448 if (i < 0 || j < left) break; 449 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL); 450 ++MIN_GALLOP_GLOBAL; 451 } 452 } 453 } 454 if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);} 455 PetscFunctionReturn(0); 456 } 457 458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper 459 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */ 460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 461 { 462 PetscInt i = start == left ? start+1 : start; 463 464 PetscFunctionBegin; 465 for (; i <= right; ++i) { 466 PetscInt j = i-1; 467 Petsc_memcpy(tarr, arr+(i*size), size); 468 while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) { 469 COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size); 470 --j; 471 } 472 Petsc_memcpy((arr)+(j+1)*size, tarr, size); 473 } 474 PetscFunctionReturn(0); 475 } 476 477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 478 { 479 PetscInt i = start == left ? start+1 : start; 480 481 PetscFunctionBegin; 482 for (; i <= right; ++i) { 483 PetscInt j = i-1; 484 Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize); 485 while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) { 486 COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize); 487 --j; 488 } 489 Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize); 490 } 491 PetscFunctionReturn(0); 492 } 493 494 /* See PetscInsertionSort_Private */ 495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 496 { 497 PetscInt i = start == left ? start+1 : start; 498 499 PetscFunctionBegin; 500 for (; i <= right; ++i) { 501 PetscInt l = left, r = i; 502 Petsc_memcpy(tarr, arr+(i*size), size); 503 do { 504 const PetscInt m = l + ((r - l) >> 1); 505 if ((*cmp)(tarr, arr+(m*size), ctx) < 0) { 506 r = m; 507 } else { 508 l = m + 1; 509 } 510 } while (l < r); 511 Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size); 512 Petsc_memcpy(arr+(l*size), tarr, size); 513 } 514 PetscFunctionReturn(0); 515 } 516 517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right) 518 { 519 PetscInt i = start == left ? start+1 : start; 520 521 PetscFunctionBegin; 522 for (; i <= right; ++i) { 523 PetscInt l = left, r = i; 524 Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize); 525 do { 526 const PetscInt m = l + ((r - l) >> 1); 527 if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) { 528 r = m; 529 } else { 530 l = m + 1; 531 } 532 } while (l < r); 533 Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize); 534 Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize); 535 } 536 PetscFunctionReturn(0); 537 } 538 539 typedef struct { 540 PetscInt size; 541 PetscInt start; 542 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */ 543 } PetscTimSortStack; 544 #else 545 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt)); 546 #endif 547 548 typedef struct { 549 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN); 550 size_t size; 551 size_t maxsize; 552 } PetscTimSortBuffer; 553 554 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize) 555 { 556 PetscFunctionBegin; 557 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0); 558 { 559 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */ 560 PetscErrorCode ierr; 561 size_t newMax = PetscMin(newSize*newSize, buff->maxsize); 562 ierr = PetscFree(buff->ptr);CHKERRQ(ierr); 563 ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr); 564 buff->size = newMax; 565 } 566 PetscFunctionReturn(0); 567 } 568 569 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize) 570 { 571 PetscFunctionBegin; 572 for (;stacksize; --stacksize) { 573 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 574 if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) { 575 PetscInt l, m = stack[stacksize].start, r; 576 PetscErrorCode ierr; 577 /* Search A for B[0] insertion */ 578 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr); 579 /* Search B for A[-1] insertion */ 580 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr); 581 if (m-l <= r-m) { 582 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr); 583 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 584 } else { 585 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr); 586 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 587 } 588 } 589 /* Update A with merge */ 590 stack[stacksize-1].size += stack[stacksize].size; 591 } 592 PetscFunctionReturn(0); 593 } 594 595 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize) 596 { 597 PetscFunctionBegin; 598 for (;stacksize; --stacksize) { 599 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */ 600 if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) { 601 PetscInt l, m = stack[stacksize].start, r; 602 PetscErrorCode ierr; 603 /* Search A for B[0] insertion */ 604 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr); 605 /* Search B for A[-1] insertion */ 606 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr); 607 if (m-l <= r-m) { 608 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr); 609 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr); 610 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 611 } else { 612 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr); 613 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr); 614 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 615 } 616 } 617 /* Update A with merge */ 618 stack[stacksize-1].size += stack[stacksize].size; 619 } 620 PetscFunctionReturn(0); 621 } 622 623 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize) 624 { 625 PetscInt i = *stacksize; 626 PetscErrorCode ierr; 627 628 PetscFunctionBegin; 629 while (i) { 630 PetscInt l, m, r, itemp = i; 631 632 if (i == 1) { 633 /* A = stack[i-1], B = stack[i] */ 634 if (stack[i-1].size < stack[i].size) { 635 /* if A[-1] <= B[0] then sorted */ 636 if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) { 637 m = stack[i].start; 638 /* Search A for B[0] insertion */ 639 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr); 640 /* Search B for A[-1] insertion */ 641 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr); 642 if (m-l <= r-m) { 643 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr); 644 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 645 } else { 646 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr); 647 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 648 } 649 } 650 /* Update A with merge */ 651 stack[i-1].size += stack[i].size; 652 --i; 653 } 654 } else { 655 /* i > 2, i.e. C exists 656 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 657 if (stack[i-2].size <= stack[i-1].size+stack[i].size) { 658 if (stack[i-2].size < stack[i].size) { 659 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 660 if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) { 661 m = stack[i-1].start; 662 /* Search A for B[0] insertion */ 663 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr); 664 /* Search B for A[-1] insertion */ 665 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr); 666 if (m-l <= r-m) { 667 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr); 668 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 669 } else { 670 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr); 671 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 672 } 673 } 674 /* Update A with merge */ 675 stack[i-2].size += stack[i-1].size; 676 /* Push C up the stack */ 677 stack[i-1].start = stack[i].start; 678 stack[i-1].size = stack[i].size; 679 } else { 680 /* merge C into B */ 681 mergeBC: 682 /* If B[-1] <= C[0] then... you know the drill */ 683 if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) { 684 m = stack[i].start; 685 /* Search B for C[0] insertion */ 686 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr); 687 /* Search C for B[-1] insertion */ 688 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr); 689 if (m-l <= r-m) { 690 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr); 691 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 692 } else { 693 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr); 694 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr); 695 } 696 } 697 /* Update B with merge */ 698 stack[i-1].size += stack[i].size; 699 } 700 --i; 701 } else if (stack[i-1].size <= stack[i].size) { 702 /* merge C into B */ 703 goto mergeBC; 704 } 705 } 706 if (itemp == i) break; 707 } 708 *stacksize = i; 709 PetscFunctionReturn(0); 710 } 711 712 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize) 713 { 714 PetscInt i = *stacksize; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 while (i) { 719 PetscInt l, m, r, itemp = i; 720 721 if (i == 1) { 722 /* A = stack[i-1], B = stack[i] */ 723 if (stack[i-1].size < stack[i].size) { 724 /* if A[-1] <= B[0] then sorted */ 725 if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) { 726 m = stack[i].start; 727 /* Search A for B[0] insertion */ 728 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr); 729 /* Search B for A[-1] insertion */ 730 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr); 731 if (m-l <= r-m) { 732 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr); 733 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr); 734 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 735 } else { 736 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr); 737 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr); 738 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 739 } 740 } 741 /* Update A with merge */ 742 stack[i-1].size += stack[i].size; 743 --i; 744 } 745 } else { 746 /* i > 2, i.e. C exists 747 A = stack[i-2], B = stack[i-1], C = stack[i]; */ 748 if (stack[i-2].size <= stack[i-1].size+stack[i].size) { 749 if (stack[i-2].size < stack[i].size) { 750 /* merge B into A, but if A[-1] <= B[0] then already sorted */ 751 if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) { 752 m = stack[i-1].start; 753 /* Search A for B[0] insertion */ 754 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr); 755 /* Search B for A[-1] insertion */ 756 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr); 757 if (m-l <= r-m) { 758 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr); 759 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr); 760 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 761 } else { 762 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr); 763 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr); 764 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 765 } 766 } 767 /* Update A with merge */ 768 stack[i-2].size += stack[i-1].size; 769 /* Push C up the stack */ 770 stack[i-1].start = stack[i].start; 771 stack[i-1].size = stack[i].size; 772 } else { 773 /* merge C into B */ 774 mergeBC: 775 /* If B[-1] <= C[0] then... you know the drill */ 776 if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) { 777 m = stack[i].start; 778 /* Search B for C[0] insertion */ 779 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr); 780 /* Search C for B[-1] insertion */ 781 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr); 782 if (m-l <= r-m) { 783 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr); 784 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr); 785 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 786 } else { 787 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr); 788 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr); 789 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr); 790 } 791 } 792 /* Update B with merge */ 793 stack[i-1].size += stack[i].size; 794 } 795 --i; 796 } else if (stack[i-1].size <= stack[i].size) { 797 /* merge C into B */ 798 goto mergeBC; 799 } 800 } 801 if (itemp == i) break; 802 } 803 *stacksize = i; 804 PetscFunctionReturn(0); 805 } 806 807 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous 808 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either 809 binary insertion sort or regulat insertion sort */ 810 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 811 { 812 const PetscInt re = PetscMin(runstart+minrun, n-1); 813 PetscInt ri = runstart; 814 815 PetscFunctionBegin; 816 if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);} 817 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 818 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) { 819 ++ri; 820 while (ri < n-1) { 821 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break; 822 ++ri; 823 } 824 { 825 PetscInt lo = runstart, hi = ri; 826 for (; lo < hi; ++lo, --hi) { 827 COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size); 828 } 829 } 830 } else { 831 ++ri; 832 while (ri < n-1) { 833 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break; 834 ++ri; 835 } 836 } 837 #if defined(PETSC_USE_DEBUG) 838 { 839 PetscErrorCode ierr; 840 ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr); 841 } 842 #endif 843 if (ri < re) { 844 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 845 binary search */ 846 if (ri-runstart <= minrun >> 1) { 847 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 848 PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re); 849 } else { 850 PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re); 851 } 852 *runend = re; 853 } else *runend = ri; 854 PetscFunctionReturn(0); 855 } 856 857 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend) 858 { 859 const PetscInt re = PetscMin(runstart+minrun, n-1); 860 PetscInt ri = runstart; 861 862 PetscFunctionBegin; 863 if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);} 864 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */ 865 if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) { 866 ++ri; 867 while (ri < n-1) { 868 if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break; 869 ++ri; 870 } 871 { 872 PetscInt lo = runstart, hi = ri; 873 for (; lo < hi; ++lo, --hi) { 874 COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr); 875 } 876 } 877 } else { 878 ++ri; 879 while (ri < n-1) { 880 if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break; 881 ++ri; 882 } 883 } 884 #if defined(PETSC_USE_DEBUG) 885 { 886 PetscErrorCode ierr; 887 ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr); 888 } 889 #endif 890 if (ri < re) { 891 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try 892 binary search */ 893 if (ri-runstart <= minrun >> 1) { 894 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */ 895 PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re); 896 } else { 897 PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re); 898 } 899 *runend = re; 900 } else *runend = ri; 901 PetscFunctionReturn(0); 902 } 903 904 /*@C 905 PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm. 906 907 Not Collective 908 909 Input Parameters: 910 + n - number of values 911 . arr - array to be sorted 912 . size - size in bytes of the datatype held in arr 913 . cmp - function pointer to comparison function 914 - ctx - optional context to be passed to comparison function, NULL if not needed 915 916 Output Parameters: 917 . arr - sorted array 918 919 Notes: 920 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 921 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to 922 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To 923 do so it repeatedly triggers attempts throughout to merge adjacent runs. 924 925 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 926 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 927 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 928 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 929 likely all contain similar data. 930 931 Sample usage: 932 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference 933 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 934 may also 935 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if 936 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing 937 order 938 939 .vb 940 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 941 my_type l = *(my_type *) left, r = *(my_type *) right; 942 return (l < r) ? -1 : (l > r); 943 } 944 .ve 945 Note the context is unused here but you may use it to pass and subsequently access whatever information required 946 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 947 Then pass the function 948 .vb 949 PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx) 950 .ve 951 952 Fortran Notes: 953 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 954 returns result. For example 955 .vb 956 subroutine CompareIntegers(left,right,ctx,result) 957 implicit none 958 959 PetscInt,intent(in) :: left, right 960 type(UserCtx) :: ctx 961 integer,intent(out) :: result 962 963 if (left < right) then 964 result = -1 965 else if (left == right) then 966 result = 0 967 else 968 result = 1 969 end if 970 return 971 end subroutine CompareIntegers 972 .ve 973 974 References: 975 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt 976 977 Level: developer 978 979 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered() 980 @*/ 981 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx) 982 { 983 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 984 PetscTimSortStack runstack[128]; 985 PetscTimSortBuffer buff; 986 PetscErrorCode ierr; 987 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 988 It is so unlikely that this limit is reached that this is __never__ checked for */ 989 990 PetscFunctionBegin; 991 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 992 is a power of 2 or one plus a power of 2 */ 993 { 994 PetscInt t = n, r = 0; 995 /* r becomes 1 if the least significant bits contain at least one off bit */ 996 while (t >= 64) { 997 r |= t & 1; 998 t >>= 1; 999 } 1000 minrun = t + r; 1001 } 1002 if (PetscDefined(USE_DEBUG)) { 1003 ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr); 1004 if (n < 64) { 1005 ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr); 1006 } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun); 1007 } 1008 ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr); 1009 buff.size = (size_t) minrun*size; 1010 buff.maxsize = (size_t) n*size; 1011 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1012 while (runstart < n) { 1013 /* Check if additional entries are at least partially ordered and build natural run */ 1014 ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr); 1015 runstack[stacksize].start = runstart; 1016 runstack[stacksize].size = runend-runstart+1; 1017 ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr); 1018 ++stacksize; 1019 runstart = runend+1; 1020 } 1021 /* Have been inside while, so discard last stacksize++ */ 1022 --stacksize; 1023 ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr); 1024 ierr = PetscFree(buff.ptr);CHKERRQ(ierr); 1025 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 /*@C 1030 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and 1031 reorders a second array to match the first. The arrays need not be the same type. 1032 1033 Not Collective 1034 1035 Input Parameters: 1036 + n - number of values 1037 . asize - size in bytes of the datatype held in arr 1038 . bsize - size in bytes of the datatype held in barr 1039 . cmp - function pointer to comparison function 1040 - ctx - optional context to be passed to comparison function, NULL if not needed 1041 1042 Input/Output Parameters: 1043 + arr - array to be sorted, on output it is sorted 1044 - barr - array to be reordered, on output it is reordered 1045 1046 Notes: 1047 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT 1048 overlap. 1049 1050 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous 1051 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims 1052 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced 1053 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs. 1054 1055 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search 1056 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in 1057 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these 1058 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they 1059 likely all contain similar data. 1060 1061 Sample usage: 1062 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference 1063 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user 1064 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only 1065 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in 1066 increasing order 1067 1068 .vb 1069 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) { 1070 my_type l = *(my_type *) left, r = *(my_type *) right; 1071 return (l < r) ? -1 : (l > r); 1072 } 1073 .ve 1074 Note the context is unused here but you may use it to pass and subsequently access whatever information required 1075 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function. 1076 Then pass the function 1077 .vb 1078 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx) 1079 .ve 1080 1081 Fortran Notes: 1082 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and 1083 returns result. For example 1084 .vb 1085 subroutine CompareIntegers(left,right,ctx,result) 1086 implicit none 1087 1088 PetscInt,intent(in) :: left, right 1089 type(UserCtx) :: ctx 1090 integer,intent(out) :: result 1091 1092 if (left < right) then 1093 result = -1 1094 else if (left == right) then 1095 result = 0 1096 else 1097 result = 1 1098 end if 1099 return 1100 end subroutine CompareIntegers 1101 .ve 1102 1103 References: 1104 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt 1105 1106 Level: developer 1107 1108 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray() 1109 @*/ 1110 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx) 1111 { 1112 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0; 1113 PetscTimSortStack runstack[128]; 1114 PetscTimSortBuffer abuff, bbuff; 1115 PetscErrorCode ierr; 1116 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements. 1117 It is so unlikely that this limit is reached that this is __never__ checked for */ 1118 1119 PetscFunctionBegin; 1120 /* Compute minrun. Minrun should be (32, 65) such that N/minrun 1121 is a power of 2 or one plus a power of 2 */ 1122 { 1123 PetscInt t = n, r = 0; 1124 /* r becomes 1 if the least significant bits contain at least one off bit */ 1125 while (t >= 64) { 1126 r |= t & 1; 1127 t >>= 1; 1128 } 1129 minrun = t + r; 1130 } 1131 if (PetscDefined(USE_DEBUG)) { 1132 ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr); 1133 if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun); 1134 } 1135 ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr); 1136 abuff.size = (size_t) minrun*asize; 1137 abuff.maxsize = (size_t) n*asize; 1138 ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr); 1139 bbuff.size = (size_t) minrun*bsize; 1140 bbuff.maxsize = (size_t) n*bsize; 1141 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1142 while (runstart < n) { 1143 /* Check if additional entries are at least partially ordered and build natural run */ 1144 ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr); 1145 runstack[stacksize].start = runstart; 1146 runstack[stacksize].size = runend-runstart+1; 1147 ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr); 1148 ++stacksize; 1149 runstart = runend+1; 1150 } 1151 /* Have been inside while, so discard last stacksize++ */ 1152 --stacksize; 1153 ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr); 1154 ierr = PetscFree(abuff.ptr);CHKERRQ(ierr); 1155 ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr); 1156 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL; 1157 PetscFunctionReturn(0); 1158 } 1159 1160 /*@ 1161 PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order. 1162 1163 Not Collective 1164 1165 Input Parameters: 1166 + n - number of values 1167 - arr - array of integers 1168 1169 Output Parameters: 1170 . arr - sorted array of integers 1171 1172 Notes: 1173 If the array is less than 64 entries long PetscSortInt() is automatically used. 1174 1175 This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is 1176 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1177 recommended that the user benchmark their code to see which routine is fastest. 1178 1179 Level: intermediate 1180 1181 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation() 1182 @*/ 1183 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[]) 1184 { 1185 PetscErrorCode ierr; 1186 PetscFunctionBegin; 1187 if (n <= 1) PetscFunctionReturn(0); 1188 PetscValidIntPointer(arr,2); 1189 if (n < 64) { 1190 ierr = PetscSortInt(n, arr);CHKERRQ(ierr); 1191 } else { 1192 ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr); 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 /*@ 1198 PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second 1199 array to match the first. 1200 1201 Not Collective 1202 1203 Input Parameter: 1204 . n - number of values 1205 1206 Input/Output Parameters: 1207 + arr1 - array of integers to be sorted, modified on output 1208 - arr2 - array of integers to be reordered, modified on output 1209 1210 Notes: 1211 The arrays CANNOT overlap. 1212 1213 This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is 1214 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1215 recommended that the user benchmark their code to see which routine is fastest. 1216 1217 Level: intermediate 1218 1219 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation() 1220 @*/ 1221 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[]) 1222 { 1223 PetscErrorCode ierr; 1224 PetscFunctionBegin; 1225 if (n <= 1) PetscFunctionReturn(0); 1226 PetscValidIntPointer(arr1,2); 1227 PetscValidIntPointer(arr2,3); 1228 /* cannot export out to PetscIntSortWithArray here since it isn't stable */ 1229 ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 /*@ 1234 PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order. 1235 1236 Not Collective 1237 1238 Input Parameters: 1239 + n - number of values 1240 - arr - array of PetscMPIInts 1241 1242 Output Parameters: 1243 . arr - sorted array of integers 1244 1245 Notes: 1246 If the array is less than 64 entries long PetscSortMPIInt() is automatically used. 1247 1248 This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is 1249 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1250 recommended that the user benchmark their code to see which routine is fastest. 1251 1252 Level: intermediate 1253 1254 .seealso: PetscTimSort(), PetscSortMPIInt() 1255 @*/ 1256 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[]) 1257 { 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 if (n <= 1) PetscFunctionReturn(0); 1262 PetscValidIntPointer(arr,2); 1263 if (n < 64) { 1264 ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr); 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /*@ 1272 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second 1273 array to match the first. 1274 1275 Not Collective 1276 1277 Input Parameter: 1278 . n - number of values 1279 1280 Input/Output Parameters: 1281 . arr1 - array of integers to be sorted, modified on output 1282 - arr2 - array of integers to be reordered, modified on output 1283 1284 Notes: 1285 The arrays CANNOT overlap. 1286 1287 This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is 1288 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1289 recommended that the user benchmark their code to see which routine is fastest. 1290 1291 Level: intermediate 1292 1293 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation() 1294 @*/ 1295 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[]) 1296 { 1297 PetscErrorCode ierr; 1298 PetscFunctionBegin; 1299 if (n <= 1) PetscFunctionReturn(0); 1300 PetscValidIntPointer(arr1,2); 1301 PetscValidIntPointer(arr2,3); 1302 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */ 1303 ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 /*@ 1308 PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order. 1309 1310 Not Collective 1311 1312 Input Parameters: 1313 + n - number of values 1314 - arr - array of PetscReals 1315 1316 Output Parameters: 1317 . arr - sorted array of integers 1318 1319 Notes: 1320 If the array is less than 64 entries long PetscSortReal() is automatically used. 1321 1322 This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is 1323 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1324 recommended that the user benchmark their code to see which routine is fastest. 1325 1326 Level: intermediate 1327 1328 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation() 1329 @*/ 1330 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[]) 1331 { 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 if (n <= 1) PetscFunctionReturn(0); 1336 PetscValidRealPointer(arr,2); 1337 if (n < 64) { 1338 ierr = PetscSortReal(n, arr);CHKERRQ(ierr); 1339 } else { 1340 ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr); 1341 } 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /*@ 1346 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second 1347 array of PetscInts to match the first. 1348 1349 Not Collective 1350 1351 Input Parameter: 1352 . n - number of values 1353 1354 Input/Output Parameters: 1355 . arr1 - array of PetscReals to be sorted, modified on output 1356 - arr2 - array of PetscReals to be reordered, modified on output 1357 1358 Notes: 1359 This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is 1360 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__ 1361 recommended that the user benchmark their code to see which routine is fastest. 1362 1363 Level: intermediate 1364 1365 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation() 1366 @*/ 1367 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[]) 1368 { 1369 PetscErrorCode ierr; 1370 PetscFunctionBegin; 1371 if (n <= 1) PetscFunctionReturn(0); 1372 PetscValidRealPointer(arr1,2); 1373 PetscValidIntPointer(arr2,3); 1374 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */ 1375 ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378