xref: /petsc/src/sys/utils/sortso.c (revision 97bb3fdc57998093333768643284f5be71d00324)
1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ar, asize);
39   __builtin_memmove(ar, al, asize);
40   __builtin_memcpy(al, t, asize);
41   __builtin_memcpy(t, br, bsize);
42   __builtin_memmove(br, bl, bsize);
43   __builtin_memcpy(bl, t, bsize);
44   return;
45 }
46 
47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, bsrc, bsize);
57   return;
58 }
59 
60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
543 
544 typedef struct {
545   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546   size_t size;
547   size_t maxsize;
548 } PetscTimSortBuffer;
549 
550 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551 {
552   PetscFunctionBegin;
553   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
554   {
555     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
556     PetscErrorCode ierr;
557     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
558     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
559     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
560     buff->size = newMax;
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566 {
567   PetscFunctionBegin;
568   for (;stacksize; --stacksize) {
569     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571       PetscInt       l, m = stack[stacksize].start, r;
572       PetscErrorCode ierr;
573       /* Search A for B[0] insertion */
574       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
575       /* Search B for A[-1] insertion */
576       ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
577       if (m-l <= r-m) {
578         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
579         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
580       } else {
581         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
582         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
583       }
584     }
585     /* Update A with merge */
586     stack[stacksize-1].size += stack[stacksize].size;
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592 {
593   PetscFunctionBegin;
594   for (;stacksize; --stacksize) {
595     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597       PetscInt       l, m = stack[stacksize].start, r;
598       PetscErrorCode ierr;
599       /* Search A for B[0] insertion */
600       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
601       /* Search B for A[-1] insertion */
602       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
603       if (m-l <= r-m) {
604         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
605         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
606         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
607       } else {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611       }
612     }
613     /* Update A with merge */
614     stack[stacksize-1].size += stack[stacksize].size;
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620 {
621   PetscInt       i = *stacksize;
622   PetscErrorCode ierr;
623 
624   PetscFunctionBegin;
625   while (i) {
626     PetscInt l, m, r, itemp = i;
627 
628     if (i == 1) {
629       /* A = stack[i-1], B = stack[i] */
630       if (stack[i-1].size < stack[i].size) {
631         /* if A[-1] <= B[0] then sorted */
632         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633           m = stack[i].start;
634           /* Search A for B[0] insertion */
635           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
636           /* Search B for A[-1] insertion */
637           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
638           if (m-l <= r-m) {
639             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
640             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
641           } else {
642             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
643             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
644           }
645         }
646         /* Update A with merge */
647         stack[i-1].size += stack[i].size;
648         --i;
649       }
650     } else {
651       /* i > 2, i.e. C exists
652        A = stack[i-2], B = stack[i-1], C = stack[i]; */
653       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654         if (stack[i-2].size < stack[i].size) {
655           /* merge B into A, but if A[-1] <= B[0] then already sorted */
656           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657             m = stack[i-1].start;
658             /* Search A for B[0] insertion */
659             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
660             /* Search B for A[-1] insertion */
661             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
662             if (m-l <= r-m) {
663               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
664               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
665             } else {
666               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
667               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
668             }
669           }
670           /* Update A with merge */
671           stack[i-2].size += stack[i-1].size;
672           /* Push C up the stack */
673           stack[i-1].start = stack[i].start;
674           stack[i-1].size = stack[i].size;
675         } else {
676           /* merge C into B */
677           mergeBC:
678           /* If B[-1] <= C[0] then... you know the drill */
679           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680             m = stack[i].start;
681             /* Search B for C[0] insertion */
682             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
683             /* Search C for B[-1] insertion */
684             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
685             if (m-l <= r-m) {
686               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
687               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
688             } else {
689               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
690               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
691             }
692           }
693           /* Update B with merge */
694           stack[i-1].size += stack[i].size;
695         }
696         --i;
697       } else if (stack[i-1].size <= stack[i].size) {
698         /* merge C into B */
699         goto mergeBC;
700       }
701     }
702     if (itemp == i) break;
703   }
704   *stacksize = i;
705   PetscFunctionReturn(0);
706 }
707 
708 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709 {
710   PetscInt       i = *stacksize;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714   while (i) {
715     PetscInt l, m, r, itemp = i;
716 
717     if (i == 1) {
718       /* A = stack[i-1], B = stack[i] */
719       if (stack[i-1].size < stack[i].size) {
720         /* if A[-1] <= B[0] then sorted */
721         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722           m = stack[i].start;
723           /* Search A for B[0] insertion */
724           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
725           /* Search B for A[-1] insertion */
726           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
727           if (m-l <= r-m) {
728             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
729             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
730             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
731           } else {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735           }
736         }
737         /* Update A with merge */
738         stack[i-1].size += stack[i].size;
739         --i;
740       }
741     } else {
742       /* i > 2, i.e. C exists
743        A = stack[i-2], B = stack[i-1], C = stack[i]; */
744       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745         if (stack[i-2].size < stack[i].size) {
746           /* merge B into A, but if A[-1] <= B[0] then already sorted */
747           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748             m = stack[i-1].start;
749             /* Search A for B[0] insertion */
750             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
751             /* Search B for A[-1] insertion */
752             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
753             if (m-l <= r-m) {
754               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
755               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
756               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
757             } else {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761             }
762           }
763           /* Update A with merge */
764           stack[i-2].size += stack[i-1].size;
765           /* Push C up the stack */
766           stack[i-1].start = stack[i].start;
767           stack[i-1].size = stack[i].size;
768         } else {
769           /* merge C into B */
770           mergeBC:
771           /* If B[-1] <= C[0] then... you know the drill */
772           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773             m = stack[i].start;
774             /* Search B for C[0] insertion */
775             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
776             /* Search C for B[-1] insertion */
777             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
778             if (m-l <= r-m) {
779               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
780               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
781               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
782             } else {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786             }
787           }
788           /* Update B with merge */
789           stack[i-1].size += stack[i].size;
790         }
791         --i;
792       } else if (stack[i-1].size <= stack[i].size) {
793         /* merge C into B */
794         goto mergeBC;
795       }
796     }
797     if (itemp == i) break;
798   }
799   *stacksize = i;
800   PetscFunctionReturn(0);
801 }
802 
803 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805  binary insertion sort or regulat insertion sort */
806 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807 {
808   const PetscInt re = PetscMin(runstart+minrun, n-1);
809   PetscInt       ri = runstart;
810 
811   PetscFunctionBegin;
812   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
813   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815     ++ri;
816     while (ri < n-1) {
817       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818       ++ri;
819     }
820     {
821       PetscInt lo = runstart, hi = ri;
822       for (; lo < hi; ++lo, --hi) {
823         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824       }
825     }
826   } else {
827     ++ri;
828     while (ri < n-1) {
829       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830       ++ri;
831     }
832   }
833 #if defined(PETSC_USE_DEBUG)
834   {
835     PetscErrorCode ierr;
836     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
837   }
838 #endif
839   if (ri < re) {
840     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841      binary search */
842     if (ri-runstart <= minrun >> 1) {
843       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845     } else {
846       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847     }
848     *runend = re;
849   } else *runend = ri;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854 {
855   const PetscInt re = PetscMin(runstart+minrun, n-1);
856   PetscInt       ri = runstart;
857 
858   PetscFunctionBegin;
859   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
860   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862     ++ri;
863     while (ri < n-1) {
864       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865       ++ri;
866     }
867     {
868       PetscInt lo = runstart, hi = ri;
869       for (; lo < hi; ++lo, --hi) {
870         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871       }
872     }
873   } else {
874     ++ri;
875     while (ri < n-1) {
876       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877       ++ri;
878     }
879   }
880 #if defined(PETSC_USE_DEBUG)
881   {
882     PetscErrorCode ierr;
883     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
884   }
885 #endif
886   if (ri < re) {
887     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888      binary search */
889     if (ri-runstart <= minrun >> 1) {
890       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892     } else {
893       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894     }
895     *runend = re;
896   } else *runend = ri;
897   PetscFunctionReturn(0);
898 }
899 
900 /*@C
901   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
902 
903   Not Collective
904 
905   Input Parameters:
906 + n    - number of values
907 . arr  - array to be sorted
908 . size - size in bytes of the datatype held in arr
909 . cmp  - function pointer to comparison function
910 - ctx  - optional context to be passed to comparison function, NULL if not needed
911 
912   Output Parameters:
913 . arr  - sorted array
914 
915   Notes:
916   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919  do so it repeatedly triggers attempts throughout to merge adjacent runs.
920 
921   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925   likely all contain similar data.
926 
927   Sample usage:
928   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930   may also
931  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933   order
934 
935 .vb
936   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937     my_type l = *(my_type *) left, r = *(my_type *) right;
938     return (l < r) ? -1 : (l > r);
939   }
940 .ve
941   Note the context is unused here but you may use it to pass and subsequently access whatever information required
942   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943   Then pass the function
944 .vb
945   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946 .ve
947 
948   Fortran Notes:
949   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950   returns result. For example
951 .vb
952  subroutine CompareIntegers(left,right,ctx,result)
953    implicit none
954 
955    PetscInt,intent(in) :: left, right
956    type(UserCtx)       :: ctx
957    integer,intent(out) :: result
958 
959    if (left < right) then
960      result = -1
961    else if (left == right) then
962      result = 0
963    else
964      result = 1
965    end if
966    return
967  end subroutine CompareIntegers
968 .ve
969 
970   References:
971   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
972 
973   Level: developer
974 
975 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976 @*/
977 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
978 {
979   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
980   PetscTimSortStack  runstack[128];
981   PetscTimSortBuffer buff;
982   PetscErrorCode     ierr;
983   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984    It is so unlikely that this limit is reached that this is __never__ checked for */
985 
986   PetscFunctionBegin;
987   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988    is a power of 2 or one plus a power of 2 */
989   {
990     PetscInt t = n, r = 0;
991     /* r becomes 1 if the least significant bits contain at least one off bit */
992     while (t >= 64) {
993       r |= t & 1;
994       t >>= 1;
995     }
996     minrun = t + r;
997   }
998   if (PetscDefined(USE_DEBUG)) {
999     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1000     if (n < 64) {
1001       ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1002     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003   }
1004   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
1005   buff.size = (size_t) minrun*size;
1006   buff.maxsize = (size_t) n*size;
1007   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008   while (runstart < n) {
1009     /* Check if additional entries are at least partially ordered and build natural run */
1010     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1011     runstack[stacksize].start = runstart;
1012     runstack[stacksize].size = runend-runstart+1;
1013     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1014     ++stacksize;
1015     runstart = runend+1;
1016   }
1017   /* Have been inside while, so discard last stacksize++ */
1018   --stacksize;
1019   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1020   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1021   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /*@C
1026   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027   reorders a second array to match the first. The arrays need not be the same type.
1028 
1029   Not Collective
1030 
1031   Input Parameters:
1032 + n     - number of values
1033 . asize - size in bytes of the datatype held in arr
1034 . bsize - size in bytes of the datatype held in barr
1035 . cmp   - function pointer to comparison function
1036 - ctx   - optional context to be passed to comparison function, NULL if not needed
1037 
1038   Input/Output Parameters:
1039 + arr  - array to be sorted, on output it is sorted
1040 - barr - array to be reordered, on output it is reordered
1041 
1042   Notes:
1043   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1044   overlap.
1045 
1046   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1047   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1048  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1049  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1050 
1051   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1052   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1053   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1054   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1055   likely all contain similar data.
1056 
1057   Sample usage:
1058   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1059   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1060   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1061   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1062   increasing order
1063 
1064 .vb
1065   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1066     my_type l = *(my_type *) left, r = *(my_type *) right;
1067     return (l < r) ? -1 : (l > r);
1068   }
1069 .ve
1070   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1071   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1072   Then pass the function
1073 .vb
1074   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1075 .ve
1076 
1077   Fortran Notes:
1078   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1079   returns result. For example
1080 .vb
1081  subroutine CompareIntegers(left,right,ctx,result)
1082    implicit none
1083 
1084    PetscInt,intent(in) :: left, right
1085    type(UserCtx)       :: ctx
1086    integer,intent(out) :: result
1087 
1088    if (left < right) then
1089      result = -1
1090    else if (left == right) then
1091      result = 0
1092    else
1093      result = 1
1094    end if
1095    return
1096  end subroutine CompareIntegers
1097 .ve
1098 
1099   References:
1100   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1101 
1102   Level: developer
1103 
1104 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1105 @*/
1106 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1107 {
1108   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1109   PetscTimSortStack  runstack[128];
1110   PetscTimSortBuffer abuff, bbuff;
1111   PetscErrorCode     ierr;
1112   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1113    It is so unlikely that this limit is reached that this is __never__ checked for */
1114 
1115   PetscFunctionBegin;
1116   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1117    is a power of 2 or one plus a power of 2 */
1118   {
1119     PetscInt t = n, r = 0;
1120     /* r becomes 1 if the least significant bits contain at least one off bit */
1121     while (t >= 64) {
1122       r |= t & 1;
1123       t >>= 1;
1124     }
1125     minrun = t + r;
1126   }
1127   if (PetscDefined(USE_DEBUG)) {
1128     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1129     if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1130   }
1131   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1132   abuff.size = (size_t) minrun*asize;
1133   abuff.maxsize = (size_t) n*asize;
1134   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1135   bbuff.size = (size_t) minrun*bsize;
1136   bbuff.maxsize = (size_t) n*bsize;
1137   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1138   while (runstart < n) {
1139     /* Check if additional entries are at least partially ordered and build natural run */
1140     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1141     runstack[stacksize].start = runstart;
1142     runstack[stacksize].size = runend-runstart+1;
1143     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1144     ++stacksize;
1145     runstart = runend+1;
1146   }
1147   /* Have been inside while, so discard last stacksize++ */
1148   --stacksize;
1149   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1150   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1151   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1152   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 /*@
1157    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1158 
1159    Not Collective
1160 
1161    Input Parameters:
1162 +  n   - number of values
1163 -  arr - array of integers
1164 
1165    Output Parameters:
1166 .  arr - sorted array of integers
1167 
1168    Notes:
1169    If the array is less than 64 entries long PetscSortInt() is automatically used.
1170 
1171    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1172    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1173    recommended that the user benchmark their code to see which routine is fastest.
1174 
1175    Level: intermediate
1176 
1177 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1178 @*/
1179 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1180 {
1181   PetscErrorCode ierr;
1182   PetscFunctionBegin;
1183   if (n <= 1) PetscFunctionReturn(0);
1184   PetscValidIntPointer(arr,2);
1185   if (n < 64) {
1186     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1187   } else {
1188     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 /*@
1194    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1195    array to match the first.
1196 
1197    Not Collective
1198 
1199    Input Parameter:
1200 .  n   - number of values
1201 
1202    Input/Output Parameters:
1203 +  arr1 - array of integers to be sorted, modified on output
1204 -  arr2 - array of integers to be reordered, modified on output
1205 
1206    Notes:
1207    The arrays CANNOT overlap.
1208 
1209    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1210    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1211    recommended that the user benchmark their code to see which routine is fastest.
1212 
1213    Level: intermediate
1214 
1215 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1216 @*/
1217 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1218 {
1219   PetscErrorCode ierr;
1220   PetscFunctionBegin;
1221   if (n <= 1) PetscFunctionReturn(0);
1222   PetscValidIntPointer(arr1,2);
1223   PetscValidIntPointer(arr2,3);
1224   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1225   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*@
1230    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1231 
1232    Not Collective
1233 
1234    Input Parameters:
1235 +  n   - number of values
1236 -  arr - array of PetscMPIInts
1237 
1238    Output Parameters:
1239 .  arr - sorted array of integers
1240 
1241    Notes:
1242    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1243 
1244    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1245    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1246    recommended that the user benchmark their code to see which routine is fastest.
1247 
1248    Level: intermediate
1249 
1250 .seealso: PetscTimSort(), PetscSortMPIInt()
1251 @*/
1252 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1253 {
1254   PetscErrorCode  ierr;
1255 
1256   PetscFunctionBegin;
1257   if (n <= 1) PetscFunctionReturn(0);
1258   PetscValidIntPointer(arr,2);
1259   if (n < 64) {
1260     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1261   } else {
1262     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 /*@
1268    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1269    array to match the first.
1270 
1271    Not Collective
1272 
1273    Input Parameter:
1274 .  n   - number of values
1275 
1276    Input/Output Parameters:
1277 .  arr1 - array of integers to be sorted, modified on output
1278 -  arr2 - array of integers to be reordered, modified on output
1279 
1280    Notes:
1281    The arrays CANNOT overlap.
1282 
1283    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1284    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1285    recommended that the user benchmark their code to see which routine is fastest.
1286 
1287    Level: intermediate
1288 
1289 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1290 @*/
1291 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1292 {
1293   PetscErrorCode ierr;
1294   PetscFunctionBegin;
1295   if (n <= 1) PetscFunctionReturn(0);
1296   PetscValidIntPointer(arr1,2);
1297   PetscValidIntPointer(arr2,3);
1298   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1299   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 /*@
1304    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1305 
1306    Not Collective
1307 
1308    Input Parameters:
1309 +  n   - number of values
1310 -  arr - array of PetscReals
1311 
1312    Output Parameters:
1313 .  arr - sorted array of integers
1314 
1315    Notes:
1316    If the array is less than 64 entries long PetscSortReal() is automatically used.
1317 
1318    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1319    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1320    recommended that the user benchmark their code to see which routine is fastest.
1321 
1322    Level: intermediate
1323 
1324 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1325 @*/
1326 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1327 {
1328   PetscErrorCode  ierr;
1329 
1330   PetscFunctionBegin;
1331   if (n <= 1) PetscFunctionReturn(0);
1332   PetscValidRealPointer(arr,2);
1333   if (n < 64) {
1334     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1335   } else {
1336     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1337   }
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /*@
1342    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1343    array of PetscInts to match the first.
1344 
1345    Not Collective
1346 
1347    Input Parameter:
1348 .  n   - number of values
1349 
1350    Input/Output Parameters:
1351 .  arr1 - array of PetscReals to be sorted, modified on output
1352 -  arr2 - array of PetscReals to be reordered, modified on output
1353 
1354    Notes:
1355    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1356    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1357    recommended that the user benchmark their code to see which routine is fastest.
1358 
1359    Level: intermediate
1360 
1361 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1362 @*/
1363 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1364 {
1365   PetscErrorCode ierr;
1366   PetscFunctionBegin;
1367   if (n <= 1) PetscFunctionReturn(0);
1368   PetscValidRealPointer(arr1,2);
1369   PetscValidIntPointer(arr2,3);
1370   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1371   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374