xref: /petsc/src/sys/utils/sortso.c (revision a2fddd78f770fa4fc19a8af67e65be331f27d92b)
1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ar, asize);
39   __builtin_memmove(ar, al, asize);
40   __builtin_memcpy(al, t, asize);
41   __builtin_memcpy(t, br, bsize);
42   __builtin_memmove(br, bl, bsize);
43   __builtin_memcpy(bl, t, bsize);
44   return;
45 }
46 
47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, bsrc, bsize);
57   return;
58 }
59 
60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
543 
544 typedef struct {
545   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546   size_t size;
547   size_t maxsize;
548 } PetscTimSortBuffer;
549 
550 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551 {
552   PetscFunctionBegin;
553   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
554   {
555     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
556     PetscErrorCode ierr;
557     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
558     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
559     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
560     buff->size = newMax;
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566 {
567   PetscFunctionBegin;
568   for (;stacksize; --stacksize) {
569     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571       PetscInt       l, m = stack[stacksize].start, r;
572       PetscErrorCode ierr;
573       /* Search A for B[0] insertion */
574       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
575       /* Search B for A[-1] insertion */
576       ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
577       if (m-l <= r-m) {
578         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
579         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
580       } else {
581         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
582         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
583       }
584     }
585     /* Update A with merge */
586     stack[stacksize-1].size += stack[stacksize].size;
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592 {
593   PetscFunctionBegin;
594   for (;stacksize; --stacksize) {
595     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597       PetscInt       l, m = stack[stacksize].start, r;
598       PetscErrorCode ierr;
599       /* Search A for B[0] insertion */
600       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
601       /* Search B for A[-1] insertion */
602       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
603       if (m-l <= r-m) {
604         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
605         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
606         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
607       } else {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611       }
612     }
613     /* Update A with merge */
614     stack[stacksize-1].size += stack[stacksize].size;
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620 {
621   PetscInt       i = *stacksize;
622   PetscErrorCode ierr;
623 
624   PetscFunctionBegin;
625   while (i) {
626     PetscInt l, m, r, itemp = i;
627 
628     if (i == 1) {
629       /* A = stack[i-1], B = stack[i] */
630       if (stack[i-1].size < stack[i].size) {
631         /* if A[-1] <= B[0] then sorted */
632         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633           m = stack[i].start;
634           /* Search A for B[0] insertion */
635           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
636           /* Search B for A[-1] insertion */
637           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
638           if (m-l <= r-m) {
639             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
640             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
641           } else {
642             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
643             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
644           }
645         }
646         /* Update A with merge */
647         stack[i-1].size += stack[i].size;
648         --i;
649       }
650     } else {
651       /* i > 2, i.e. C exists
652        A = stack[i-2], B = stack[i-1], C = stack[i]; */
653       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654         if (stack[i-2].size < stack[i].size) {
655           /* merge B into A, but if A[-1] <= B[0] then already sorted */
656           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657             m = stack[i-1].start;
658             /* Search A for B[0] insertion */
659             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
660             /* Search B for A[-1] insertion */
661             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
662             if (m-l <= r-m) {
663               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
664               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
665             } else {
666               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
667               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
668             }
669           }
670           /* Update A with merge */
671           stack[i-2].size += stack[i-1].size;
672           /* Push C up the stack */
673           stack[i-1].start = stack[i].start;
674           stack[i-1].size = stack[i].size;
675         } else {
676           /* merge C into B */
677           mergeBC:
678           /* If B[-1] <= C[0] then... you know the drill */
679           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680             m = stack[i].start;
681             /* Search B for C[0] insertion */
682             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
683             /* Search C for B[-1] insertion */
684             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
685             if (m-l <= r-m) {
686               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
687               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
688             } else {
689               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
690               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
691             }
692           }
693           /* Update B with merge */
694           stack[i-1].size += stack[i].size;
695         }
696         --i;
697       } else if (stack[i-1].size <= stack[i].size) {
698         /* merge C into B */
699         goto mergeBC;
700       }
701     }
702     if (itemp == i) break;
703   }
704   *stacksize = i;
705   PetscFunctionReturn(0);
706 }
707 
708 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709 {
710   PetscInt       i = *stacksize;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714   while (i) {
715     PetscInt l, m, r, itemp = i;
716 
717     if (i == 1) {
718       /* A = stack[i-1], B = stack[i] */
719       if (stack[i-1].size < stack[i].size) {
720         /* if A[-1] <= B[0] then sorted */
721         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722           m = stack[i].start;
723           /* Search A for B[0] insertion */
724           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
725           /* Search B for A[-1] insertion */
726           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
727           if (m-l <= r-m) {
728             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
729             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
730             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
731           } else {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735           }
736         }
737         /* Update A with merge */
738         stack[i-1].size += stack[i].size;
739         --i;
740       }
741     } else {
742       /* i > 2, i.e. C exists
743        A = stack[i-2], B = stack[i-1], C = stack[i]; */
744       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745         if (stack[i-2].size < stack[i].size) {
746           /* merge B into A, but if A[-1] <= B[0] then already sorted */
747           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748             m = stack[i-1].start;
749             /* Search A for B[0] insertion */
750             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
751             /* Search B for A[-1] insertion */
752             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
753             if (m-l <= r-m) {
754               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
755               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
756               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
757             } else {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761             }
762           }
763           /* Update A with merge */
764           stack[i-2].size += stack[i-1].size;
765           /* Push C up the stack */
766           stack[i-1].start = stack[i].start;
767           stack[i-1].size = stack[i].size;
768         } else {
769           /* merge C into B */
770           mergeBC:
771           /* If B[-1] <= C[0] then... you know the drill */
772           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773             m = stack[i].start;
774             /* Search B for C[0] insertion */
775             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
776             /* Search C for B[-1] insertion */
777             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
778             if (m-l <= r-m) {
779               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
780               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
781               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
782             } else {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786             }
787           }
788           /* Update B with merge */
789           stack[i-1].size += stack[i].size;
790         }
791         --i;
792       } else if (stack[i-1].size <= stack[i].size) {
793         /* merge C into B */
794         goto mergeBC;
795       }
796     }
797     if (itemp == i) break;
798   }
799   *stacksize = i;
800   PetscFunctionReturn(0);
801 }
802 
803 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805  binary insertion sort or regulat insertion sort */
806 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807 {
808   const PetscInt re = PetscMin(runstart+minrun, n-1);
809   PetscInt       ri = runstart;
810 
811   PetscFunctionBegin;
812   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
813   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815     ++ri;
816     while (ri < n-1) {
817       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818       ++ri;
819     }
820     {
821       PetscInt lo = runstart, hi = ri;
822       for (; lo < hi; ++lo, --hi) {
823         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824       }
825     }
826   } else {
827     ++ri;
828     while (ri < n-1) {
829       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830       ++ri;
831     }
832   }
833 #if defined(PETSC_USE_DEBUG)
834   {
835     PetscErrorCode ierr;
836     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
837   }
838 #endif
839   if (ri < re) {
840     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841      binary search */
842     if (ri-runstart <= minrun >> 1) {
843       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845     } else {
846       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847     }
848     *runend = re;
849   } else *runend = ri;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854 {
855   const PetscInt re = PetscMin(runstart+minrun, n-1);
856   PetscInt       ri = runstart;
857 
858   PetscFunctionBegin;
859   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
860   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862     ++ri;
863     while (ri < n-1) {
864       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865       ++ri;
866     }
867     {
868       PetscInt lo = runstart, hi = ri;
869       for (; lo < hi; ++lo, --hi) {
870         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871       }
872     }
873   } else {
874     ++ri;
875     while (ri < n-1) {
876       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877       ++ri;
878     }
879   }
880 #if defined(PETSC_USE_DEBUG)
881   {
882     PetscErrorCode ierr;
883     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
884   }
885 #endif
886   if (ri < re) {
887     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888      binary search */
889     if (ri-runstart <= minrun >> 1) {
890       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892     } else {
893       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894     }
895     *runend = re;
896   } else *runend = ri;
897   PetscFunctionReturn(0);
898 }
899 
900 /*@C
901   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
902 
903   Not Collective
904 
905   Input Parameters:
906 + n    - number of values
907 . arr  - array to be sorted
908 . size - size in bytes of the datatype held in arr
909 . cmp  - function pointer to comparison function
910 - ctx  - optional context to be passed to comparison function, NULL if not needed
911 
912   Output Parameters:
913 . arr  - sorted array
914 
915   Notes:
916   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919  do so it repeatedly triggers attempts throughout to merge adjacent runs.
920 
921   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925   likely all contain similar data.
926 
927   Sample usage:
928   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930   may also
931  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933   order
934 
935 .vb
936   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937     my_type l = *(my_type *) left, r = *(my_type *) right;
938     return (l < r) ? -1 : (l > r);
939   }
940 .ve
941   Note the context is unused here but you may use it to pass and subsequently access whatever information required
942   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943   Then pass the function
944 .vb
945   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946 .ve
947 
948   Fortran Notes:
949   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950   returns result. For example
951 .vb
952  subroutine CompareIntegers(left,right,ctx,result)
953    implicit none
954 
955    PetscInt,intent(in) :: left, right
956    type(UserCtx)       :: ctx
957    integer,intent(out) :: result
958 
959    if (left < right) then
960      result = -1
961    else if (left == right) then
962      result = 0
963    else
964      result = 1
965    end if
966    return
967  end subroutine CompareIntegers
968 .ve
969 
970   References:
971   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
972 
973   Level: developer
974 
975 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976 @*/
977 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
978 {
979   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
980   PetscTimSortStack  runstack[128];
981   PetscTimSortBuffer buff;
982   PetscErrorCode     ierr;
983   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984    It is so unlikely that this limit is reached that this is __never__ checked for */
985 
986   PetscFunctionBegin;
987   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988    is a power of 2 or one plus a power of 2 */
989   {
990     PetscInt t = n, r = 0;
991     /* r becomes 1 if the least significant bits contain at least one off bit */
992     while (t >= 64) {
993       r |= t & 1;
994       t >>= 1;
995     }
996     minrun = t + r;
997   }
998   if (PetscDefined(USE_DEBUG)) {
999     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1000     if (n < 64) {
1001       ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1002     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003   }
1004   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
1005   buff.size = (size_t) minrun*size;
1006   buff.maxsize = (size_t) n*size;
1007   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008   while (runstart < n) {
1009     /* Check if additional entries are at least partially ordered and build natural run */
1010     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1011     runstack[stacksize].start = runstart;
1012     runstack[stacksize].size = runend-runstart+1;
1013     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1014     ++stacksize;
1015     runstart = runend+1;
1016   }
1017   /* Have been inside while, so discard last stacksize++ */
1018   --stacksize;
1019   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1020   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1021   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /*@C
1026   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027   reorders a second array to match the first. The arrays need not be the same type.
1028 
1029   Not Collective
1030 
1031   Input Parameters:
1032 + n     - number of values
1033 . arr   - array to be sorted
1034 . asize - size in bytes of the datatype held in arr
1035 . barr  - array to be reordered
1036 . asize - size in bytes of the datatype held in barr
1037 . cmp   - function pointer to comparison function
1038 - ctx   - optional context to be passed to comparison function, NULL if not needed
1039 
1040   Output Parameters:
1041 + arr  - sorted array
1042 - barr - reordered array
1043 
1044   Notes:
1045   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1046   overlap.
1047 
1048   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1049   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1050  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1051  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1052 
1053   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1054   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1055   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1056   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1057   likely all contain similar data.
1058 
1059   Sample usage:
1060   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1061   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1062   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1063   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1064   increasing order
1065 
1066 .vb
1067   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1068     my_type l = *(my_type *) left, r = *(my_type *) right;
1069     return (l < r) ? -1 : (l > r);
1070   }
1071 .ve
1072   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1073   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1074   Then pass the function
1075 .vb
1076   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1077 .ve
1078 
1079   Fortran Notes:
1080   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1081   returns result. For example
1082 .vb
1083  subroutine CompareIntegers(left,right,ctx,result)
1084    implicit none
1085 
1086    PetscInt,intent(in) :: left, right
1087    type(UserCtx)       :: ctx
1088    integer,intent(out) :: result
1089 
1090    if (left < right) then
1091      result = -1
1092    else if (left == right) then
1093      result = 0
1094    else
1095      result = 1
1096    end if
1097    return
1098  end subroutine CompareIntegers
1099 .ve
1100 
1101   References:
1102   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1103 
1104   Level: developer
1105 
1106 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1107 @*/
1108 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1109 {
1110   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1111   PetscTimSortStack  runstack[128];
1112   PetscTimSortBuffer abuff, bbuff;
1113   PetscErrorCode     ierr;
1114   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1115    It is so unlikely that this limit is reached that this is __never__ checked for */
1116 
1117   PetscFunctionBegin;
1118   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1119    is a power of 2 or one plus a power of 2 */
1120   {
1121     PetscInt t = n, r = 0;
1122     /* r becomes 1 if the least significant bits contain at least one off bit */
1123     while (t >= 64) {
1124       r |= t & 1;
1125       t >>= 1;
1126     }
1127     minrun = t + r;
1128   }
1129   if (PetscDefined(USE_DEBUG)) {
1130     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1131     if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1132   }
1133   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1134   abuff.size = (size_t) minrun*asize;
1135   abuff.maxsize = (size_t) n*asize;
1136   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1137   bbuff.size = (size_t) minrun*bsize;
1138   bbuff.maxsize = (size_t) n*bsize;
1139   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1140   while (runstart < n) {
1141     /* Check if additional entries are at least partially ordered and build natural run */
1142     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1143     runstack[stacksize].start = runstart;
1144     runstack[stacksize].size = runend-runstart+1;
1145     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1146     ++stacksize;
1147     runstart = runend+1;
1148   }
1149   /* Have been inside while, so discard last stacksize++ */
1150   --stacksize;
1151   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1152   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1153   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1154   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 /*@
1159    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1160 
1161    Not Collective
1162 
1163    Input Parameters:
1164 +  n   - number of values
1165 -  arr - array of integers
1166 
1167    Output Parameters:
1168 .  arr - sorted array of integers
1169 
1170    Notes:
1171    If the array is less than 64 entries long PetscSortInt() is automatically used.
1172 
1173    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1174    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1175    recomended that the user benchmark their code to see which routine is fastest.
1176 
1177    Level: intermediate
1178 
1179 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1180 @*/
1181 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1182 {
1183   PetscErrorCode ierr;
1184   PetscFunctionBegin;
1185   if (n <= 1) PetscFunctionReturn(0);
1186   PetscValidIntPointer(arr,2);
1187   if (n < 64) {
1188     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 /*@
1196    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1197    array to match the first.
1198 
1199    Not Collective
1200 
1201    Input Parameters:
1202 +  n   - number of values
1203 .  arr1 - array of integers to be sorted
1204 -  arr2 - array of integers to be reordered
1205 
1206    Output Parameters:
1207 +  arr1 - sorted array of integers
1208 -  arr2 - reordered array of integers
1209 
1210    Notes:
1211    The arrays CANNOT overlap.
1212 
1213    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1214    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1215    recomended that the user benchmark their code to see which routine is fastest.
1216 
1217    Level: intermediate
1218 
1219 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1220 @*/
1221 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1222 {
1223   PetscErrorCode ierr;
1224   PetscFunctionBegin;
1225   if (n <= 1) PetscFunctionReturn(0);
1226   PetscValidIntPointer(arr1,2);
1227   PetscValidIntPointer(arr2,3);
1228   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1229   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 /*@
1234    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1235 
1236    Not Collective
1237 
1238    Input Parameters:
1239 +  n   - number of values
1240 -  arr - array of PetscMPIInts
1241 
1242    Output Parameters:
1243 .  arr - sorted array of integers
1244 
1245    Notes:
1246    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1247 
1248    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1249    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1250    recomended that the user benchmark their code to see which routine is fastest.
1251 
1252    Level: intermediate
1253 
1254 .seealso: PetscTimSort(), PetscSortMPIInt()
1255 @*/
1256 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1257 {
1258   PetscErrorCode  ierr;
1259 
1260   PetscFunctionBegin;
1261   if (n <= 1) PetscFunctionReturn(0);
1262   PetscValidIntPointer(arr,2);
1263   if (n < 64) {
1264     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 /*@
1272    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1273    array to match the first.
1274 
1275    Not Collective
1276 
1277    Input Parameters:
1278 +  n   - number of values
1279 .  arr1 - array of integers to be sorted
1280 -  arr2 - array of integers to be reordered
1281 
1282    Output Parameters:
1283 +  arr1 - sorted array of integers
1284 -  arr2 - reordered array of integers
1285 
1286    Notes:
1287    The arrays CANNOT overlap.
1288 
1289    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1290    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1291    recomended that the user benchmark their code to see which routine is fastest.
1292 
1293    Level: intermediate
1294 
1295 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1296 @*/
1297 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1298 {
1299   PetscErrorCode ierr;
1300   PetscFunctionBegin;
1301   if (n <= 1) PetscFunctionReturn(0);
1302   PetscValidIntPointer(arr1,2);
1303   PetscValidIntPointer(arr2,3);
1304   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1305   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 /*@
1310    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1311 
1312    Not Collective
1313 
1314    Input Parameters:
1315 +  n   - number of values
1316 -  arr - array of PetscReals
1317 
1318    Output Parameters:
1319 .  arr - sorted array of integers
1320 
1321    Notes:
1322    If the array is less than 64 entries long PetscSortReal() is automatically used.
1323 
1324    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1325    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1326    recomended that the user benchmark their code to see which routine is fastest.
1327 
1328    Level: intermediate
1329 
1330 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1331 @*/
1332 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1333 {
1334   PetscErrorCode  ierr;
1335 
1336   PetscFunctionBegin;
1337   if (n <= 1) PetscFunctionReturn(0);
1338   PetscValidRealPointer(arr,2);
1339   if (n < 64) {
1340     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1341   } else {
1342     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 /*@
1348    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1349    array of PetscInts to match the first.
1350 
1351    Not Collective
1352 
1353    Input Parameters:
1354 +  n   - number of values
1355 .  arr1 - array of PetscReals to be sorted
1356 -  arr2 - array of PetscReals to be reordered
1357 
1358    Output Parameters:
1359 +  arr1 - sorted array of PetscReals
1360 -  arr2 - reordered array of PetscInts
1361 
1362    Notes:
1363    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1364    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1365    recomended that the user benchmark their code to see which routine is fastest.
1366 
1367    Level: intermediate
1368 
1369 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1370 @*/
1371 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1372 {
1373   PetscErrorCode ierr;
1374   PetscFunctionBegin;
1375   if (n <= 1) PetscFunctionReturn(0);
1376   PetscValidRealPointer(arr1,2);
1377   PetscValidIntPointer(arr2,3);
1378   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1379   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1380   PetscFunctionReturn(0);
1381 }
1382