xref: /petsc/src/sys/utils/sortso.c (revision 2fa40bb9206b96114faa7cb222621ec184d31cd2)
1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ar, asize);
39   __builtin_memmove(ar, al, asize);
40   __builtin_memcpy(al, t, asize);
41   __builtin_memcpy(t, br, bsize);
42   __builtin_memmove(br, bl, bsize);
43   __builtin_memcpy(bl, t, bsize);
44   return;
45 }
46 
47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, bsrc, bsize);
57   return;
58 }
59 
60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
543 } PetscTimSortStack;
544 #else
545 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
546 #endif
547 
548 typedef struct {
549   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
550   size_t size;
551   size_t maxsize;
552 } PetscTimSortBuffer;
553 
554 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
555 {
556   PetscFunctionBegin;
557   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
558   {
559     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
560     PetscErrorCode ierr;
561     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
562     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
563     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
564     buff->size = newMax;
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
570 {
571   PetscFunctionBegin;
572   for (;stacksize; --stacksize) {
573     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
574     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
575       PetscInt       l, m = stack[stacksize].start, r;
576       PetscErrorCode ierr;
577       /* Search A for B[0] insertion */
578       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
579       /* Search B for A[-1] insertion */
580       ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
581       if (m-l <= r-m) {
582         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
583         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
584       } else {
585         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
586         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
587       }
588     }
589     /* Update A with merge */
590     stack[stacksize-1].size += stack[stacksize].size;
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596 {
597   PetscFunctionBegin;
598   for (;stacksize; --stacksize) {
599     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
601       PetscInt       l, m = stack[stacksize].start, r;
602       PetscErrorCode ierr;
603       /* Search A for B[0] insertion */
604       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
605       /* Search B for A[-1] insertion */
606       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
607       if (m-l <= r-m) {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611       } else {
612         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
613         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
614         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
615       }
616     }
617     /* Update A with merge */
618     stack[stacksize-1].size += stack[stacksize].size;
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
624 {
625   PetscInt       i = *stacksize;
626   PetscErrorCode ierr;
627 
628   PetscFunctionBegin;
629   while (i) {
630     PetscInt l, m, r, itemp = i;
631 
632     if (i == 1) {
633       /* A = stack[i-1], B = stack[i] */
634       if (stack[i-1].size < stack[i].size) {
635         /* if A[-1] <= B[0] then sorted */
636         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
637           m = stack[i].start;
638           /* Search A for B[0] insertion */
639           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
640           /* Search B for A[-1] insertion */
641           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
642           if (m-l <= r-m) {
643             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
644             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
645           } else {
646             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
647             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
648           }
649         }
650         /* Update A with merge */
651         stack[i-1].size += stack[i].size;
652         --i;
653       }
654     } else {
655       /* i > 2, i.e. C exists
656        A = stack[i-2], B = stack[i-1], C = stack[i]; */
657       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
658         if (stack[i-2].size < stack[i].size) {
659           /* merge B into A, but if A[-1] <= B[0] then already sorted */
660           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
661             m = stack[i-1].start;
662             /* Search A for B[0] insertion */
663             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
664             /* Search B for A[-1] insertion */
665             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
666             if (m-l <= r-m) {
667               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
668               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
669             } else {
670               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
671               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
672             }
673           }
674           /* Update A with merge */
675           stack[i-2].size += stack[i-1].size;
676           /* Push C up the stack */
677           stack[i-1].start = stack[i].start;
678           stack[i-1].size = stack[i].size;
679         } else {
680           /* merge C into B */
681           mergeBC:
682           /* If B[-1] <= C[0] then... you know the drill */
683           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
684             m = stack[i].start;
685             /* Search B for C[0] insertion */
686             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
687             /* Search C for B[-1] insertion */
688             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
689             if (m-l <= r-m) {
690               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
691               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
692             } else {
693               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
694               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
695             }
696           }
697           /* Update B with merge */
698           stack[i-1].size += stack[i].size;
699         }
700         --i;
701       } else if (stack[i-1].size <= stack[i].size) {
702         /* merge C into B */
703         goto mergeBC;
704       }
705     }
706     if (itemp == i) break;
707   }
708   *stacksize = i;
709   PetscFunctionReturn(0);
710 }
711 
712 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
713 {
714   PetscInt       i = *stacksize;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   while (i) {
719     PetscInt l, m, r, itemp = i;
720 
721     if (i == 1) {
722       /* A = stack[i-1], B = stack[i] */
723       if (stack[i-1].size < stack[i].size) {
724         /* if A[-1] <= B[0] then sorted */
725         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
726           m = stack[i].start;
727           /* Search A for B[0] insertion */
728           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
729           /* Search B for A[-1] insertion */
730           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
731           if (m-l <= r-m) {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735           } else {
736             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
737             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
738             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
739           }
740         }
741         /* Update A with merge */
742         stack[i-1].size += stack[i].size;
743         --i;
744       }
745     } else {
746       /* i > 2, i.e. C exists
747        A = stack[i-2], B = stack[i-1], C = stack[i]; */
748       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
749         if (stack[i-2].size < stack[i].size) {
750           /* merge B into A, but if A[-1] <= B[0] then already sorted */
751           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
752             m = stack[i-1].start;
753             /* Search A for B[0] insertion */
754             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
755             /* Search B for A[-1] insertion */
756             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
757             if (m-l <= r-m) {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761             } else {
762               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
763               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
764               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
765             }
766           }
767           /* Update A with merge */
768           stack[i-2].size += stack[i-1].size;
769           /* Push C up the stack */
770           stack[i-1].start = stack[i].start;
771           stack[i-1].size = stack[i].size;
772         } else {
773           /* merge C into B */
774           mergeBC:
775           /* If B[-1] <= C[0] then... you know the drill */
776           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
777             m = stack[i].start;
778             /* Search B for C[0] insertion */
779             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
780             /* Search C for B[-1] insertion */
781             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
782             if (m-l <= r-m) {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786             } else {
787               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
788               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
789               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
790             }
791           }
792           /* Update B with merge */
793           stack[i-1].size += stack[i].size;
794         }
795         --i;
796       } else if (stack[i-1].size <= stack[i].size) {
797         /* merge C into B */
798         goto mergeBC;
799       }
800     }
801     if (itemp == i) break;
802   }
803   *stacksize = i;
804   PetscFunctionReturn(0);
805 }
806 
807 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
808  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
809  binary insertion sort or regulat insertion sort */
810 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
811 {
812   const PetscInt re = PetscMin(runstart+minrun, n-1);
813   PetscInt       ri = runstart;
814 
815   PetscFunctionBegin;
816   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
817   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
819     ++ri;
820     while (ri < n-1) {
821       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
822       ++ri;
823     }
824     {
825       PetscInt lo = runstart, hi = ri;
826       for (; lo < hi; ++lo, --hi) {
827         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
828       }
829     }
830   } else {
831     ++ri;
832     while (ri < n-1) {
833       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
834       ++ri;
835     }
836   }
837 #if defined(PETSC_USE_DEBUG)
838   {
839     PetscErrorCode ierr;
840     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
841   }
842 #endif
843   if (ri < re) {
844     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
845      binary search */
846     if (ri-runstart <= minrun >> 1) {
847       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
848       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
849     } else {
850       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
851     }
852     *runend = re;
853   } else *runend = ri;
854   PetscFunctionReturn(0);
855 }
856 
857 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
858 {
859   const PetscInt re = PetscMin(runstart+minrun, n-1);
860   PetscInt       ri = runstart;
861 
862   PetscFunctionBegin;
863   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
864   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
865   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
866     ++ri;
867     while (ri < n-1) {
868       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
869       ++ri;
870     }
871     {
872       PetscInt lo = runstart, hi = ri;
873       for (; lo < hi; ++lo, --hi) {
874         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
875       }
876     }
877   } else {
878     ++ri;
879     while (ri < n-1) {
880       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
881       ++ri;
882     }
883   }
884 #if defined(PETSC_USE_DEBUG)
885   {
886     PetscErrorCode ierr;
887     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
888   }
889 #endif
890   if (ri < re) {
891     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
892      binary search */
893     if (ri-runstart <= minrun >> 1) {
894       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
895       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
896     } else {
897       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
898     }
899     *runend = re;
900   } else *runend = ri;
901   PetscFunctionReturn(0);
902 }
903 
904 /*@C
905   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
906 
907   Not Collective
908 
909   Input Parameters:
910 + n    - number of values
911 . arr  - array to be sorted
912 . size - size in bytes of the datatype held in arr
913 . cmp  - function pointer to comparison function
914 - ctx  - optional context to be passed to comparison function, NULL if not needed
915 
916   Output Parameters:
917 . arr  - sorted array
918 
919   Notes:
920   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
921  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
922  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
923  do so it repeatedly triggers attempts throughout to merge adjacent runs.
924 
925   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
926   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
927   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
928   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
929   likely all contain similar data.
930 
931   Sample usage:
932   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
933   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
934   may also
935  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
936  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
937   order
938 
939 .vb
940   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
941     my_type l = *(my_type *) left, r = *(my_type *) right;
942     return (l < r) ? -1 : (l > r);
943   }
944 .ve
945   Note the context is unused here but you may use it to pass and subsequently access whatever information required
946   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
947   Then pass the function
948 .vb
949   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
950 .ve
951 
952   Fortran Notes:
953   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
954   returns result. For example
955 .vb
956  subroutine CompareIntegers(left,right,ctx,result)
957    implicit none
958 
959    PetscInt,intent(in) :: left, right
960    type(UserCtx)       :: ctx
961    integer,intent(out) :: result
962 
963    if (left < right) then
964      result = -1
965    else if (left == right) then
966      result = 0
967    else
968      result = 1
969    end if
970    return
971  end subroutine CompareIntegers
972 .ve
973 
974   References:
975   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
976 
977   Level: developer
978 
979 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
980 @*/
981 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
982 {
983   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
984   PetscTimSortStack  runstack[128];
985   PetscTimSortBuffer buff;
986   PetscErrorCode     ierr;
987   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
988    It is so unlikely that this limit is reached that this is __never__ checked for */
989 
990   PetscFunctionBegin;
991   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
992    is a power of 2 or one plus a power of 2 */
993   {
994     PetscInt t = n, r = 0;
995     /* r becomes 1 if the least significant bits contain at least one off bit */
996     while (t >= 64) {
997       r |= t & 1;
998       t >>= 1;
999     }
1000     minrun = t + r;
1001   }
1002   if (PetscDefined(USE_DEBUG)) {
1003     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1004     if (n < 64) {
1005       ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1006     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1007   }
1008   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
1009   buff.size = (size_t) minrun*size;
1010   buff.maxsize = (size_t) n*size;
1011   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1012   while (runstart < n) {
1013     /* Check if additional entries are at least partially ordered and build natural run */
1014     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1015     runstack[stacksize].start = runstart;
1016     runstack[stacksize].size = runend-runstart+1;
1017     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1018     ++stacksize;
1019     runstart = runend+1;
1020   }
1021   /* Have been inside while, so discard last stacksize++ */
1022   --stacksize;
1023   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1024   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1025   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*@C
1030   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1031   reorders a second array to match the first. The arrays need not be the same type.
1032 
1033   Not Collective
1034 
1035   Input Parameters:
1036 + n     - number of values
1037 . asize - size in bytes of the datatype held in arr
1038 . bsize - size in bytes of the datatype held in barr
1039 . cmp   - function pointer to comparison function
1040 - ctx   - optional context to be passed to comparison function, NULL if not needed
1041 
1042   Input/Output Parameters:
1043 + arr  - array to be sorted, on output it is sorted
1044 - barr - array to be reordered, on output it is reordered
1045 
1046   Notes:
1047   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1048   overlap.
1049 
1050   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1051   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1052  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1053  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1054 
1055   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1056   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1057   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1058   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1059   likely all contain similar data.
1060 
1061   Sample usage:
1062   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1063   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1064   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1065   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1066   increasing order
1067 
1068 .vb
1069   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1070     my_type l = *(my_type *) left, r = *(my_type *) right;
1071     return (l < r) ? -1 : (l > r);
1072   }
1073 .ve
1074   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1075   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1076   Then pass the function
1077 .vb
1078   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1079 .ve
1080 
1081   Fortran Notes:
1082   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1083   returns result. For example
1084 .vb
1085  subroutine CompareIntegers(left,right,ctx,result)
1086    implicit none
1087 
1088    PetscInt,intent(in) :: left, right
1089    type(UserCtx)       :: ctx
1090    integer,intent(out) :: result
1091 
1092    if (left < right) then
1093      result = -1
1094    else if (left == right) then
1095      result = 0
1096    else
1097      result = 1
1098    end if
1099    return
1100  end subroutine CompareIntegers
1101 .ve
1102 
1103   References:
1104   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1105 
1106   Level: developer
1107 
1108 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1109 @*/
1110 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1111 {
1112   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1113   PetscTimSortStack  runstack[128];
1114   PetscTimSortBuffer abuff, bbuff;
1115   PetscErrorCode     ierr;
1116   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1117    It is so unlikely that this limit is reached that this is __never__ checked for */
1118 
1119   PetscFunctionBegin;
1120   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1121    is a power of 2 or one plus a power of 2 */
1122   {
1123     PetscInt t = n, r = 0;
1124     /* r becomes 1 if the least significant bits contain at least one off bit */
1125     while (t >= 64) {
1126       r |= t & 1;
1127       t >>= 1;
1128     }
1129     minrun = t + r;
1130   }
1131   if (PetscDefined(USE_DEBUG)) {
1132     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1133     if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1134   }
1135   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1136   abuff.size = (size_t) minrun*asize;
1137   abuff.maxsize = (size_t) n*asize;
1138   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1139   bbuff.size = (size_t) minrun*bsize;
1140   bbuff.maxsize = (size_t) n*bsize;
1141   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1142   while (runstart < n) {
1143     /* Check if additional entries are at least partially ordered and build natural run */
1144     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1145     runstack[stacksize].start = runstart;
1146     runstack[stacksize].size = runend-runstart+1;
1147     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1148     ++stacksize;
1149     runstart = runend+1;
1150   }
1151   /* Have been inside while, so discard last stacksize++ */
1152   --stacksize;
1153   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1154   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1155   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1156   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 /*@
1161    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1162 
1163    Not Collective
1164 
1165    Input Parameters:
1166 +  n   - number of values
1167 -  arr - array of integers
1168 
1169    Output Parameters:
1170 .  arr - sorted array of integers
1171 
1172    Notes:
1173    If the array is less than 64 entries long PetscSortInt() is automatically used.
1174 
1175    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1176    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1177    recommended that the user benchmark their code to see which routine is fastest.
1178 
1179    Level: intermediate
1180 
1181 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1182 @*/
1183 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1184 {
1185   PetscErrorCode ierr;
1186   PetscFunctionBegin;
1187   if (n <= 1) PetscFunctionReturn(0);
1188   PetscValidIntPointer(arr,2);
1189   if (n < 64) {
1190     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1191   } else {
1192     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1193   }
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 /*@
1198    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1199    array to match the first.
1200 
1201    Not Collective
1202 
1203    Input Parameter:
1204 .  n   - number of values
1205 
1206    Input/Output Parameters:
1207 +  arr1 - array of integers to be sorted, modified on output
1208 -  arr2 - array of integers to be reordered, modified on output
1209 
1210    Notes:
1211    The arrays CANNOT overlap.
1212 
1213    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1214    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1215    recommended that the user benchmark their code to see which routine is fastest.
1216 
1217    Level: intermediate
1218 
1219 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1220 @*/
1221 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1222 {
1223   PetscErrorCode ierr;
1224   PetscFunctionBegin;
1225   if (n <= 1) PetscFunctionReturn(0);
1226   PetscValidIntPointer(arr1,2);
1227   PetscValidIntPointer(arr2,3);
1228   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1229   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 /*@
1234    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1235 
1236    Not Collective
1237 
1238    Input Parameters:
1239 +  n   - number of values
1240 -  arr - array of PetscMPIInts
1241 
1242    Output Parameters:
1243 .  arr - sorted array of integers
1244 
1245    Notes:
1246    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1247 
1248    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1249    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1250    recommended that the user benchmark their code to see which routine is fastest.
1251 
1252    Level: intermediate
1253 
1254 .seealso: PetscTimSort(), PetscSortMPIInt()
1255 @*/
1256 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1257 {
1258   PetscErrorCode  ierr;
1259 
1260   PetscFunctionBegin;
1261   if (n <= 1) PetscFunctionReturn(0);
1262   PetscValidIntPointer(arr,2);
1263   if (n < 64) {
1264     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 /*@
1272    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1273    array to match the first.
1274 
1275    Not Collective
1276 
1277    Input Parameter:
1278 .  n   - number of values
1279 
1280    Input/Output Parameters:
1281 .  arr1 - array of integers to be sorted, modified on output
1282 -  arr2 - array of integers to be reordered, modified on output
1283 
1284    Notes:
1285    The arrays CANNOT overlap.
1286 
1287    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1288    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1289    recommended that the user benchmark their code to see which routine is fastest.
1290 
1291    Level: intermediate
1292 
1293 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1294 @*/
1295 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1296 {
1297   PetscErrorCode ierr;
1298   PetscFunctionBegin;
1299   if (n <= 1) PetscFunctionReturn(0);
1300   PetscValidIntPointer(arr1,2);
1301   PetscValidIntPointer(arr2,3);
1302   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1303   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 /*@
1308    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1309 
1310    Not Collective
1311 
1312    Input Parameters:
1313 +  n   - number of values
1314 -  arr - array of PetscReals
1315 
1316    Output Parameters:
1317 .  arr - sorted array of integers
1318 
1319    Notes:
1320    If the array is less than 64 entries long PetscSortReal() is automatically used.
1321 
1322    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1323    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1324    recommended that the user benchmark their code to see which routine is fastest.
1325 
1326    Level: intermediate
1327 
1328 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1329 @*/
1330 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1331 {
1332   PetscErrorCode  ierr;
1333 
1334   PetscFunctionBegin;
1335   if (n <= 1) PetscFunctionReturn(0);
1336   PetscValidRealPointer(arr,2);
1337   if (n < 64) {
1338     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1339   } else {
1340     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 /*@
1346    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1347    array of PetscInts to match the first.
1348 
1349    Not Collective
1350 
1351    Input Parameter:
1352 .  n   - number of values
1353 
1354    Input/Output Parameters:
1355 .  arr1 - array of PetscReals to be sorted, modified on output
1356 -  arr2 - array of PetscReals to be reordered, modified on output
1357 
1358    Notes:
1359    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1360    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1361    recommended that the user benchmark their code to see which routine is fastest.
1362 
1363    Level: intermediate
1364 
1365 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1366 @*/
1367 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1368 {
1369   PetscErrorCode ierr;
1370   PetscFunctionBegin;
1371   if (n <= 1) PetscFunctionReturn(0);
1372   PetscValidRealPointer(arr1,2);
1373   PetscValidIntPointer(arr2,3);
1374   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1375   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378