xref: /petsc/src/sys/utils/sortso.c (revision 89d292be1450a79b784144c42fa3a9ce1a901ebe)
1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ar, asize);
39   __builtin_memmove(ar, al, asize);
40   __builtin_memcpy(al, t, asize);
41   __builtin_memcpy(t, br, bsize);
42   __builtin_memmove(br, bl, bsize);
43   __builtin_memcpy(bl, t, bsize);
44   return;
45 }
46 
47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, bsrc, bsize);
57   return;
58 }
59 
60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
543 } PetscTimSortStack;
544 #else
545 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
546 #endif
547 
548 typedef struct {
549   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
550   size_t size;
551   size_t maxsize;
552 } PetscTimSortBuffer;
553 
554 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
555 {
556   PetscFunctionBegin;
557   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
558   {
559     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
560     PetscErrorCode ierr;
561     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
562     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
563     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
564     buff->size = newMax;
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
570 {
571   PetscFunctionBegin;
572   for (;stacksize; --stacksize) {
573     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
574     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
575       PetscInt       l, m = stack[stacksize].start, r;
576       PetscErrorCode ierr;
577       /* Search A for B[0] insertion */
578       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
579       /* Search B for A[-1] insertion */
580       ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
581       if (m-l <= r-m) {
582         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
583         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
584       } else {
585         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
586         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
587       }
588     }
589     /* Update A with merge */
590     stack[stacksize-1].size += stack[stacksize].size;
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596 {
597   PetscFunctionBegin;
598   for (;stacksize; --stacksize) {
599     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
601       PetscInt       l, m = stack[stacksize].start, r;
602       PetscErrorCode ierr;
603       /* Search A for B[0] insertion */
604       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
605       /* Search B for A[-1] insertion */
606       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
607       if (m-l <= r-m) {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611       } else {
612         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
613         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
614         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
615       }
616     }
617     /* Update A with merge */
618     stack[stacksize-1].size += stack[stacksize].size;
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
624 {
625   PetscInt       i = *stacksize;
626   PetscErrorCode ierr;
627 
628   PetscFunctionBegin;
629   while (i) {
630     PetscInt l, m, r, itemp = i;
631 
632     if (i == 1) {
633       /* A = stack[i-1], B = stack[i] */
634       if (stack[i-1].size < stack[i].size) {
635         /* if A[-1] <= B[0] then sorted */
636         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
637           m = stack[i].start;
638           /* Search A for B[0] insertion */
639           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
640           /* Search B for A[-1] insertion */
641           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
642           if (m-l <= r-m) {
643             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
644             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
645           } else {
646             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
647             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
648           }
649         }
650         /* Update A with merge */
651         stack[i-1].size += stack[i].size;
652         --i;
653       }
654     } else {
655       /* i > 2, i.e. C exists
656        A = stack[i-2], B = stack[i-1], C = stack[i]; */
657       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
658         if (stack[i-2].size < stack[i].size) {
659           /* merge B into A, but if A[-1] <= B[0] then already sorted */
660           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
661             m = stack[i-1].start;
662             /* Search A for B[0] insertion */
663             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
664             /* Search B for A[-1] insertion */
665             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
666             if (m-l <= r-m) {
667               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
668               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
669             } else {
670               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
671               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
672             }
673           }
674           /* Update A with merge */
675           stack[i-2].size += stack[i-1].size;
676           /* Push C up the stack */
677           stack[i-1].start = stack[i].start;
678           stack[i-1].size = stack[i].size;
679         } else {
680           /* merge C into B */
681           mergeBC:
682           /* If B[-1] <= C[0] then... you know the drill */
683           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
684             m = stack[i].start;
685             /* Search B for C[0] insertion */
686             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
687             /* Search C for B[-1] insertion */
688             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
689             if (m-l <= r-m) {
690               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
691               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
692             } else {
693               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
694               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
695             }
696           }
697           /* Update B with merge */
698           stack[i-1].size += stack[i].size;
699         }
700         --i;
701       } else if (stack[i-1].size <= stack[i].size) {
702         /* merge C into B */
703         goto mergeBC;
704       }
705     }
706     if (itemp == i) break;
707   }
708   *stacksize = i;
709   PetscFunctionReturn(0);
710 }
711 
712 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
713 {
714   PetscInt       i = *stacksize;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   while (i) {
719     PetscInt l, m, r, itemp = i;
720 
721     if (i == 1) {
722       /* A = stack[i-1], B = stack[i] */
723       if (stack[i-1].size < stack[i].size) {
724         /* if A[-1] <= B[0] then sorted */
725         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
726           m = stack[i].start;
727           /* Search A for B[0] insertion */
728           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
729           /* Search B for A[-1] insertion */
730           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
731           if (m-l <= r-m) {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735           } else {
736             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
737             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
738             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
739           }
740         }
741         /* Update A with merge */
742         stack[i-1].size += stack[i].size;
743         --i;
744       }
745     } else {
746       /* i > 2, i.e. C exists
747        A = stack[i-2], B = stack[i-1], C = stack[i]; */
748       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
749         if (stack[i-2].size < stack[i].size) {
750           /* merge B into A, but if A[-1] <= B[0] then already sorted */
751           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
752             m = stack[i-1].start;
753             /* Search A for B[0] insertion */
754             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
755             /* Search B for A[-1] insertion */
756             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
757             if (m-l <= r-m) {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761             } else {
762               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
763               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
764               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
765             }
766           }
767           /* Update A with merge */
768           stack[i-2].size += stack[i-1].size;
769           /* Push C up the stack */
770           stack[i-1].start = stack[i].start;
771           stack[i-1].size = stack[i].size;
772         } else {
773           /* merge C into B */
774           mergeBC:
775           /* If B[-1] <= C[0] then... you know the drill */
776           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
777             m = stack[i].start;
778             /* Search B for C[0] insertion */
779             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
780             /* Search C for B[-1] insertion */
781             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
782             if (m-l <= r-m) {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786             } else {
787               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
788               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
789               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
790             }
791           }
792           /* Update B with merge */
793           stack[i-1].size += stack[i].size;
794         }
795         --i;
796       } else if (stack[i-1].size <= stack[i].size) {
797         /* merge C into B */
798         goto mergeBC;
799       }
800     }
801     if (itemp == i) break;
802   }
803   *stacksize = i;
804   PetscFunctionReturn(0);
805 }
806 
807 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
808  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
809  binary insertion sort or regulat insertion sort */
810 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
811 {
812   const PetscInt re = PetscMin(runstart+minrun, n-1);
813   PetscInt       ri = runstart;
814 
815   PetscFunctionBegin;
816   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
817   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
819     ++ri;
820     while (ri < n-1) {
821       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
822       ++ri;
823     }
824     {
825       PetscInt lo = runstart, hi = ri;
826       for (; lo < hi; ++lo, --hi) {
827         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
828       }
829     }
830   } else {
831     ++ri;
832     while (ri < n-1) {
833       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
834       ++ri;
835     }
836   }
837 #if defined(PETSC_USE_DEBUG)
838   {
839     PetscErrorCode ierr;
840     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
841   }
842 #endif
843   if (ri < re) {
844     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
845      binary search */
846     if (ri-runstart <= minrun >> 1) {
847       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
848       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
849     } else {
850       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
851     }
852     *runend = re;
853   } else *runend = ri;
854   PetscFunctionReturn(0);
855 }
856 
857 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
858 {
859   const PetscInt re = PetscMin(runstart+minrun, n-1);
860   PetscInt       ri = runstart;
861 
862   PetscFunctionBegin;
863   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
864   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
865   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
866     ++ri;
867     while (ri < n-1) {
868       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
869       ++ri;
870     }
871     {
872       PetscInt lo = runstart, hi = ri;
873       for (; lo < hi; ++lo, --hi) {
874         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
875       }
876     }
877   } else {
878     ++ri;
879     while (ri < n-1) {
880       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
881       ++ri;
882     }
883   }
884 #if defined(PETSC_USE_DEBUG)
885   {
886     PetscErrorCode ierr;
887     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
888   }
889 #endif
890   if (ri < re) {
891     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
892      binary search */
893     if (ri-runstart <= minrun >> 1) {
894       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
895       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
896     } else {
897       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
898     }
899     *runend = re;
900   } else *runend = ri;
901   PetscFunctionReturn(0);
902 }
903 
904 /*@C
905   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
906 
907   Not Collective
908 
909   Input Parameters:
910 + n    - number of values
911 . arr  - array to be sorted
912 . size - size in bytes of the datatype held in arr
913 . cmp  - function pointer to comparison function
914 - ctx  - optional context to be passed to comparison function, NULL if not needed
915 
916   Output Parameters:
917 . arr  - sorted array
918 
919   Notes:
920   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
921  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
922  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
923  do so it repeatedly triggers attempts throughout to merge adjacent runs.
924 
925   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
926   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
927   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
928   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
929   likely all contain similar data.
930 
931   Sample usage:
932   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
933   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
934   may also
935  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
936  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
937   order
938 
939 .vb
940   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
941     my_type l = *(my_type *) left, r = *(my_type *) right;
942     return (l < r) ? -1 : (l > r);
943   }
944 .ve
945   Note the context is unused here but you may use it to pass and subsequently access whatever information required
946   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
947   Then pass the function
948 .vb
949   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
950 .ve
951 
952   Fortran Notes:
953   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
954   returns result. For example
955 .vb
956  subroutine CompareIntegers(left,right,ctx,result)
957    implicit none
958 
959    PetscInt,intent(in) :: left, right
960    type(UserCtx)       :: ctx
961    integer,intent(out) :: result
962 
963    if (left < right) then
964      result = -1
965    else if (left == right) then
966      result = 0
967    else
968      result = 1
969    end if
970    return
971  end subroutine CompareIntegers
972 .ve
973 
974   References:
975   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
976 
977   Level: developer
978 
979 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
980 @*/
981 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
982 {
983   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
984   PetscTimSortStack  runstack[128];
985   PetscTimSortBuffer buff;
986   PetscErrorCode     ierr;
987   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
988    It is so unlikely that this limit is reached that this is __never__ checked for */
989 
990   PetscFunctionBegin;
991   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
992    is a power of 2 or one plus a power of 2 */
993   {
994     PetscInt t = n, r = 0;
995     /* r becomes 1 if the least significant bits contain at least one off bit */
996     while (t >= 64) {
997       r |= t & 1;
998       t >>= 1;
999     }
1000     minrun = t + r;
1001   }
1002   if (PetscDefined(USE_DEBUG)) {
1003     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1004     if (n < 64) {
1005       ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1006     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1007   }
1008   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
1009   buff.size = (size_t) minrun*size;
1010   buff.maxsize = (size_t) n*size;
1011   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1012   while (runstart < n) {
1013     /* Check if additional entries are at least partially ordered and build natural run */
1014     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1015     runstack[stacksize].start = runstart;
1016     runstack[stacksize].size = runend-runstart+1;
1017     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1018     ++stacksize;
1019     runstart = runend+1;
1020   }
1021   /* Have been inside while, so discard last stacksize++ */
1022   --stacksize;
1023   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1024   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1025   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*@C
1030   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1031   reorders a second array to match the first. The arrays need not be the same type.
1032 
1033   Not Collective
1034 
1035   Input Parameters:
1036 + n     - number of values
1037 . arr   - array to be sorted
1038 . asize - size in bytes of the datatype held in arr
1039 . barr  - array to be reordered
1040 . asize - size in bytes of the datatype held in barr
1041 . cmp   - function pointer to comparison function
1042 - ctx   - optional context to be passed to comparison function, NULL if not needed
1043 
1044   Output Parameters:
1045 + arr  - sorted array
1046 - barr - reordered array
1047 
1048   Notes:
1049   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1050   overlap.
1051 
1052   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1053   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1054  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1055  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1056 
1057   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1058   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1059   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1060   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1061   likely all contain similar data.
1062 
1063   Sample usage:
1064   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1065   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1066   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1067   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1068   increasing order
1069 
1070 .vb
1071   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1072     my_type l = *(my_type *) left, r = *(my_type *) right;
1073     return (l < r) ? -1 : (l > r);
1074   }
1075 .ve
1076   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1077   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1078   Then pass the function
1079 .vb
1080   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1081 .ve
1082 
1083   Fortran Notes:
1084   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1085   returns result. For example
1086 .vb
1087  subroutine CompareIntegers(left,right,ctx,result)
1088    implicit none
1089 
1090    PetscInt,intent(in) :: left, right
1091    type(UserCtx)       :: ctx
1092    integer,intent(out) :: result
1093 
1094    if (left < right) then
1095      result = -1
1096    else if (left == right) then
1097      result = 0
1098    else
1099      result = 1
1100    end if
1101    return
1102  end subroutine CompareIntegers
1103 .ve
1104 
1105   References:
1106   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1107 
1108   Level: developer
1109 
1110 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1111 @*/
1112 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1113 {
1114   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1115   PetscTimSortStack  runstack[128];
1116   PetscTimSortBuffer abuff, bbuff;
1117   PetscErrorCode     ierr;
1118   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1119    It is so unlikely that this limit is reached that this is __never__ checked for */
1120 
1121   PetscFunctionBegin;
1122   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1123    is a power of 2 or one plus a power of 2 */
1124   {
1125     PetscInt t = n, r = 0;
1126     /* r becomes 1 if the least significant bits contain at least one off bit */
1127     while (t >= 64) {
1128       r |= t & 1;
1129       t >>= 1;
1130     }
1131     minrun = t + r;
1132   }
1133   if (PetscDefined(USE_DEBUG)) {
1134     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1135     if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1136   }
1137   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1138   abuff.size = (size_t) minrun*asize;
1139   abuff.maxsize = (size_t) n*asize;
1140   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1141   bbuff.size = (size_t) minrun*bsize;
1142   bbuff.maxsize = (size_t) n*bsize;
1143   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1144   while (runstart < n) {
1145     /* Check if additional entries are at least partially ordered and build natural run */
1146     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1147     runstack[stacksize].start = runstart;
1148     runstack[stacksize].size = runend-runstart+1;
1149     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1150     ++stacksize;
1151     runstart = runend+1;
1152   }
1153   /* Have been inside while, so discard last stacksize++ */
1154   --stacksize;
1155   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1156   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1157   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1158   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*@
1163    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1164 
1165    Not Collective
1166 
1167    Input Parameters:
1168 +  n   - number of values
1169 -  arr - array of integers
1170 
1171    Output Parameters:
1172 .  arr - sorted array of integers
1173 
1174    Notes:
1175    If the array is less than 64 entries long PetscSortInt() is automatically used.
1176 
1177    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1178    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1179    recommended that the user benchmark their code to see which routine is fastest.
1180 
1181    Level: intermediate
1182 
1183 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1184 @*/
1185 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1186 {
1187   PetscErrorCode ierr;
1188   PetscFunctionBegin;
1189   if (n <= 1) PetscFunctionReturn(0);
1190   PetscValidIntPointer(arr,2);
1191   if (n < 64) {
1192     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1193   } else {
1194     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 /*@
1200    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1201    array to match the first.
1202 
1203    Not Collective
1204 
1205    Input Parameters:
1206 +  n   - number of values
1207 .  arr1 - array of integers to be sorted
1208 -  arr2 - array of integers to be reordered
1209 
1210    Output Parameters:
1211 +  arr1 - sorted array of integers
1212 -  arr2 - reordered array of integers
1213 
1214    Notes:
1215    The arrays CANNOT overlap.
1216 
1217    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1218    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1219    recommended that the user benchmark their code to see which routine is fastest.
1220 
1221    Level: intermediate
1222 
1223 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1224 @*/
1225 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1226 {
1227   PetscErrorCode ierr;
1228   PetscFunctionBegin;
1229   if (n <= 1) PetscFunctionReturn(0);
1230   PetscValidIntPointer(arr1,2);
1231   PetscValidIntPointer(arr2,3);
1232   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1233   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*@
1238    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1239 
1240    Not Collective
1241 
1242    Input Parameters:
1243 +  n   - number of values
1244 -  arr - array of PetscMPIInts
1245 
1246    Output Parameters:
1247 .  arr - sorted array of integers
1248 
1249    Notes:
1250    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1251 
1252    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1253    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1254    recommended that the user benchmark their code to see which routine is fastest.
1255 
1256    Level: intermediate
1257 
1258 .seealso: PetscTimSort(), PetscSortMPIInt()
1259 @*/
1260 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1261 {
1262   PetscErrorCode  ierr;
1263 
1264   PetscFunctionBegin;
1265   if (n <= 1) PetscFunctionReturn(0);
1266   PetscValidIntPointer(arr,2);
1267   if (n < 64) {
1268     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1269   } else {
1270     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 /*@
1276    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1277    array to match the first.
1278 
1279    Not Collective
1280 
1281    Input Parameters:
1282 +  n   - number of values
1283 .  arr1 - array of integers to be sorted
1284 -  arr2 - array of integers to be reordered
1285 
1286    Output Parameters:
1287 +  arr1 - sorted array of integers
1288 -  arr2 - reordered array of integers
1289 
1290    Notes:
1291    The arrays CANNOT overlap.
1292 
1293    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1294    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1295    recommended that the user benchmark their code to see which routine is fastest.
1296 
1297    Level: intermediate
1298 
1299 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1300 @*/
1301 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1302 {
1303   PetscErrorCode ierr;
1304   PetscFunctionBegin;
1305   if (n <= 1) PetscFunctionReturn(0);
1306   PetscValidIntPointer(arr1,2);
1307   PetscValidIntPointer(arr2,3);
1308   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1309   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /*@
1314    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1315 
1316    Not Collective
1317 
1318    Input Parameters:
1319 +  n   - number of values
1320 -  arr - array of PetscReals
1321 
1322    Output Parameters:
1323 .  arr - sorted array of integers
1324 
1325    Notes:
1326    If the array is less than 64 entries long PetscSortReal() is automatically used.
1327 
1328    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1329    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1330    recommended that the user benchmark their code to see which routine is fastest.
1331 
1332    Level: intermediate
1333 
1334 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1335 @*/
1336 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1337 {
1338   PetscErrorCode  ierr;
1339 
1340   PetscFunctionBegin;
1341   if (n <= 1) PetscFunctionReturn(0);
1342   PetscValidRealPointer(arr,2);
1343   if (n < 64) {
1344     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1345   } else {
1346     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1347   }
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 /*@
1352    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1353    array of PetscInts to match the first.
1354 
1355    Not Collective
1356 
1357    Input Parameters:
1358 +  n   - number of values
1359 .  arr1 - array of PetscReals to be sorted
1360 -  arr2 - array of PetscReals to be reordered
1361 
1362    Output Parameters:
1363 +  arr1 - sorted array of PetscReals
1364 -  arr2 - reordered array of PetscInts
1365 
1366    Notes:
1367    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1368    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1369    recommended that the user benchmark their code to see which routine is fastest.
1370 
1371    Level: intermediate
1372 
1373 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1374 @*/
1375 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1376 {
1377   PetscErrorCode ierr;
1378   PetscFunctionBegin;
1379   if (n <= 1) PetscFunctionReturn(0);
1380   PetscValidRealPointer(arr1,2);
1381   PetscValidIntPointer(arr2,3);
1382   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1383   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386