xref: /petsc/src/sys/utils/sortso.c (revision 2cb003023780d2e6f0235b15c2c8c1bc50e4978a)
1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ar, asize);
39   __builtin_memmove(ar, al, asize);
40   __builtin_memcpy(al, t, asize);
41   __builtin_memcpy(t, br, bsize);
42   __builtin_memmove(br, bl, bsize);
43   __builtin_memcpy(bl, t, bsize);
44   return;
45 }
46 
47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, bsrc, bsize);
57   return;
58 }
59 
60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
543 
544 typedef struct {
545   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546   size_t size;
547   size_t maxsize;
548 } PetscTimSortBuffer;
549 
550 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551 {
552   PetscFunctionBegin;
553   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
554   {
555     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
556     PetscErrorCode ierr;
557     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
558     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
559     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
560     buff->size = newMax;
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566 {
567   PetscFunctionBegin;
568   for (;stacksize; --stacksize) {
569     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571       PetscInt       l, m = stack[stacksize].start, r;
572       PetscErrorCode ierr;
573       /* Search A for B[0] insertion */
574       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
575       /* Search B for A[-1] insertion */
576       ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
577       if (m-l <= r-m) {
578         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
579         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
580       } else {
581         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
582         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
583       }
584     }
585     /* Update A with merge */
586     stack[stacksize-1].size += stack[stacksize].size;
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592 {
593   PetscFunctionBegin;
594   for (;stacksize; --stacksize) {
595     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597       PetscInt       l, m = stack[stacksize].start, r;
598       PetscErrorCode ierr;
599       /* Search A for B[0] insertion */
600       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
601       /* Search B for A[-1] insertion */
602       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
603       if (m-l <= r-m) {
604         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
605         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
606         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
607       } else {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611       }
612     }
613     /* Update A with merge */
614     stack[stacksize-1].size += stack[stacksize].size;
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620 {
621   PetscInt       i = *stacksize;
622   PetscErrorCode ierr;
623 
624   PetscFunctionBegin;
625   while (i) {
626     PetscInt l, m, r, itemp = i;
627 
628     if (i == 1) {
629       /* A = stack[i-1], B = stack[i] */
630       if (stack[i-1].size < stack[i].size) {
631         /* if A[-1] <= B[0] then sorted */
632         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633           m = stack[i].start;
634           /* Search A for B[0] insertion */
635           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
636           /* Search B for A[-1] insertion */
637           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
638           if (m-l <= r-m) {
639             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
640             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
641           } else {
642             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
643             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
644           }
645         }
646         /* Update A with merge */
647         stack[i-1].size += stack[i].size;
648         --i;
649       }
650     } else {
651       /* i > 2, i.e. C exists
652        A = stack[i-2], B = stack[i-1], C = stack[i]; */
653       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654         if (stack[i-2].size < stack[i].size) {
655           /* merge B into A, but if A[-1] <= B[0] then already sorted */
656           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657             m = stack[i-1].start;
658             /* Search A for B[0] insertion */
659             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
660             /* Search B for A[-1] insertion */
661             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
662             if (m-l <= r-m) {
663               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
664               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
665             } else {
666               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
667               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
668             }
669           }
670           /* Update A with merge */
671           stack[i-2].size += stack[i-1].size;
672           /* Push C up the stack */
673           stack[i-1].start = stack[i].start;
674           stack[i-1].size = stack[i].size;
675         } else {
676           /* merge C into B */
677           mergeBC:
678           /* If B[-1] <= C[0] then... you know the drill */
679           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680             m = stack[i].start;
681             /* Search B for C[0] insertion */
682             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
683             /* Search C for B[-1] insertion */
684             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
685             if (m-l <= r-m) {
686               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
687               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
688             } else {
689               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
690               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
691             }
692           }
693           /* Update B with merge */
694           stack[i-1].size += stack[i].size;
695         }
696         --i;
697       } else if (stack[i-1].size <= stack[i].size) {
698         /* merge C into B */
699         goto mergeBC;
700       }
701     }
702     if (itemp == i) break;
703   }
704   *stacksize = i;
705   PetscFunctionReturn(0);
706 }
707 
708 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709 {
710   PetscInt       i = *stacksize;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714   while (i) {
715     PetscInt l, m, r, itemp = i;
716 
717     if (i == 1) {
718       /* A = stack[i-1], B = stack[i] */
719       if (stack[i-1].size < stack[i].size) {
720         /* if A[-1] <= B[0] then sorted */
721         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722           m = stack[i].start;
723           /* Search A for B[0] insertion */
724           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
725           /* Search B for A[-1] insertion */
726           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
727           if (m-l <= r-m) {
728             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
729             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
730             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
731           } else {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735           }
736         }
737         /* Update A with merge */
738         stack[i-1].size += stack[i].size;
739         --i;
740       }
741     } else {
742       /* i > 2, i.e. C exists
743        A = stack[i-2], B = stack[i-1], C = stack[i]; */
744       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745         if (stack[i-2].size < stack[i].size) {
746           /* merge B into A, but if A[-1] <= B[0] then already sorted */
747           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748             m = stack[i-1].start;
749             /* Search A for B[0] insertion */
750             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
751             /* Search B for A[-1] insertion */
752             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
753             if (m-l <= r-m) {
754               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
755               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
756               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
757             } else {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761             }
762           }
763           /* Update A with merge */
764           stack[i-2].size += stack[i-1].size;
765           /* Push C up the stack */
766           stack[i-1].start = stack[i].start;
767           stack[i-1].size = stack[i].size;
768         } else {
769           /* merge C into B */
770           mergeBC:
771           /* If B[-1] <= C[0] then... you know the drill */
772           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773             m = stack[i].start;
774             /* Search B for C[0] insertion */
775             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
776             /* Search C for B[-1] insertion */
777             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
778             if (m-l <= r-m) {
779               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
780               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
781               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
782             } else {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786             }
787           }
788           /* Update B with merge */
789           stack[i-1].size += stack[i].size;
790         }
791         --i;
792       } else if (stack[i-1].size <= stack[i].size) {
793         /* merge C into B */
794         goto mergeBC;
795       }
796     }
797     if (itemp == i) break;
798   }
799   *stacksize = i;
800   PetscFunctionReturn(0);
801 }
802 
803 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805  binary insertion sort or regulat insertion sort */
806 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807 {
808   const PetscInt re = PetscMin(runstart+minrun, n-1);
809   PetscInt       ri = runstart;
810 
811   PetscFunctionBegin;
812   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
813   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815     ++ri;
816     while (ri < n-1) {
817       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818       ++ri;
819     }
820     {
821       PetscInt lo = runstart, hi = ri;
822       for (; lo < hi; ++lo, --hi) {
823         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824       }
825     }
826   } else {
827     ++ri;
828     while (ri < n-1) {
829       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830       ++ri;
831     }
832   }
833 #if defined(PETSC_USE_DEBUG)
834   {
835     PetscErrorCode ierr;
836     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
837   }
838 #endif
839   if (ri < re) {
840     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841      binary search */
842     if (ri-runstart <= minrun >> 1) {
843       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845     } else {
846       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847     }
848     *runend = re;
849   } else *runend = ri;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854 {
855   const PetscInt re = PetscMin(runstart+minrun, n-1);
856   PetscInt       ri = runstart;
857 
858   PetscFunctionBegin;
859   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
860   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862     ++ri;
863     while (ri < n-1) {
864       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865       ++ri;
866     }
867     {
868       PetscInt lo = runstart, hi = ri;
869       for (; lo < hi; ++lo, --hi) {
870         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871       }
872     }
873   } else {
874     ++ri;
875     while (ri < n-1) {
876       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877       ++ri;
878     }
879   }
880 #if defined(PETSC_USE_DEBUG)
881   {
882     PetscErrorCode ierr;
883     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
884   }
885 #endif
886   if (ri < re) {
887     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888      binary search */
889     if (ri-runstart <= minrun >> 1) {
890       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892     } else {
893       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894     }
895     *runend = re;
896   } else *runend = ri;
897   PetscFunctionReturn(0);
898 }
899 
900 /*@C
901   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
902 
903   Not Collective
904 
905   Input Parameters:
906 + n    - number of values
907 . arr  - array to be sorted
908 . size - size in bytes of the datatype held in arr
909 . cmp  - function pointer to comparison function
910 - ctx  - optional context to be passed to comparison function, NULL if not needed
911 
912   Output Parameters:
913 . arr  - sorted array
914 
915   Notes:
916   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919  do so it repeatedly triggers attempts throughout to merge adjacent runs.
920 
921   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925   likely all contain similar data.
926 
927   Sample usage:
928   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930   may also
931  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933   order
934 
935 .vb
936   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937     my_type l = *(my_type *) left, r = *(my_type *) right;
938     return (l < r) ? -1 : (l > r);
939   }
940 .ve
941   Note the context is unused here but you may use it to pass and subsequently access whatever information required
942   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943   Then pass the function
944 .vb
945   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946 .ve
947 
948   Fortran Notes:
949   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950   returns result. For example
951 .vb
952  subroutine CompareIntegers(left,right,ctx,result)
953    implicit none
954 
955    PetscInt,intent(in) :: left, right
956    type(UserCtx)       :: ctx
957    integer,intent(out) :: result
958 
959    if (left < right) then
960      result = -1
961    else if (left == right) then
962      result = 0
963    else
964      result = 1
965    end if
966    return
967  end subroutine CompareIntegers
968 .ve
969 
970   References:
971   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
972 
973   Level: developer
974 
975 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976 @*/
977 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
978 {
979   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
980   PetscTimSortStack  runstack[128];
981   PetscTimSortBuffer buff;
982   PetscErrorCode     ierr;
983   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984    It is so unlikely that this limit is reached that this is __never__ checked for */
985 
986   PetscFunctionBegin;
987   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988    is a power of 2 or one plus a power of 2 */
989   {
990     PetscInt t = n, r = 0;
991     /* r becomes 1 if the least significant bits contain at least one off bit */
992     while (t >= 64) {
993       r |= t & 1;
994       t >>= 1;
995     }
996     minrun = t + r;
997   }
998   if (PetscDefined(USE_DEBUG)) {
999     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1000     if (n < 64) {
1001       ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1002     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003   }
1004   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
1005   buff.size = (size_t) minrun*size;
1006   buff.maxsize = (size_t) n*size;
1007   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008   while (runstart < n) {
1009     /* Check if additional entries are at least partially ordered and build natural run */
1010     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1011     runstack[stacksize].start = runstart;
1012     runstack[stacksize].size = runend-runstart+1;
1013     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1014     ++stacksize;
1015     runstart = runend+1;
1016   }
1017   /* Have been inside while, so discard last stacksize++ */
1018   --stacksize;
1019   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1020   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1021   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /*@C
1026   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027   reorders a second array to match the first. The arrays need not be the same type.
1028 
1029   Not Collective
1030 
1031   Input Parameters:
1032 + n     - number of values
1033 . arr   - array to be sorted
1034 . asize - size in bytes of the datatype held in arr
1035 . barr  - array to be reordered
1036 . asize - size in bytes of the datatype held in barr
1037 . cmp   - function pointer to comparison function
1038 - ctx   - optional context to be passed to comparison function, NULL if not needed
1039 
1040   Output Parameters:
1041 + arr  - sorted array
1042 - barr - reordered array
1043 
1044   Notes:
1045   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1046   overlap.
1047 
1048   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1049   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1050  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1051  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1052 
1053   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1054   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1055   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1056   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1057   likely all contain similar data.
1058 
1059   Sample usage:
1060   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1061   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1062   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1063   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1064   increasing order
1065 
1066 .vb
1067   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1068     my_type l = *(my_type *) left, r = *(my_type *) right;
1069     return (l < r) ? -1 : (l > r);
1070   }
1071 .ve
1072   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1073   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1074   Then pass the function
1075 .vb
1076   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1077 .ve
1078 
1079 
1080   Fortran Notes:
1081   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1082   returns result. For example
1083 .vb
1084  subroutine CompareIntegers(left,right,ctx,result)
1085    implicit none
1086 
1087    PetscInt,intent(in) :: left, right
1088    type(UserCtx)       :: ctx
1089    integer,intent(out) :: result
1090 
1091    if (left < right) then
1092      result = -1
1093    else if (left == right) then
1094      result = 0
1095    else
1096      result = 1
1097    end if
1098    return
1099  end subroutine CompareIntegers
1100 .ve
1101 
1102   References:
1103   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1104 
1105   Level: developer
1106 
1107 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1108 @*/
1109 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1110 {
1111   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1112   PetscTimSortStack  runstack[128];
1113   PetscTimSortBuffer abuff, bbuff;
1114   PetscErrorCode     ierr;
1115   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1116    It is so unlikely that this limit is reached that this is __never__ checked for */
1117 
1118   PetscFunctionBegin;
1119   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1120    is a power of 2 or one plus a power of 2 */
1121   {
1122     PetscInt t = n, r = 0;
1123     /* r becomes 1 if the least significant bits contain at least one off bit */
1124     while (t >= 64) {
1125       r |= t & 1;
1126       t >>= 1;
1127     }
1128     minrun = t + r;
1129   }
1130   if (PetscDefined(USE_DEBUG)) {
1131     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1132     if ((n >= 64) && ((minrun < 32) || (minrun > 65))) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1133   }
1134   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1135   abuff.size = (size_t) minrun*asize;
1136   abuff.maxsize = (size_t) n*asize;
1137   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1138   bbuff.size = (size_t) minrun*bsize;
1139   bbuff.maxsize = (size_t) n*bsize;
1140   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1141   while (runstart < n) {
1142     /* Check if additional entries are at least partially ordered and build natural run */
1143     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1144     runstack[stacksize].start = runstart;
1145     runstack[stacksize].size = runend-runstart+1;
1146     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1147     ++stacksize;
1148     runstart = runend+1;
1149   }
1150   /* Have been inside while, so discard last stacksize++ */
1151   --stacksize;
1152   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1153   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1154   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1155   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 /*@
1160    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1161 
1162    Not Collective
1163 
1164    Input Parameters:
1165 +  n   - number of values
1166 -  arr - array of integers
1167 
1168    Output Parameters:
1169 .  arr - sorted array of integers
1170 
1171    Notes:
1172    If the array is less than 64 entries long PetscSortInt() is automatically used.
1173 
1174    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1175    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1176    recomended that the user benchmark their code to see which routine is fastest.
1177 
1178    Level: intermediate
1179 
1180 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1181 @*/
1182 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1183 {
1184   PetscErrorCode ierr;
1185   PetscFunctionBegin;
1186   if (n <= 1) PetscFunctionReturn(0);
1187   PetscValidIntPointer(arr,2);
1188   if (n < 64) {
1189     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 /*@
1197    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1198    array to match the first.
1199 
1200    Not Collective
1201 
1202    Input Parameters:
1203 +  n   - number of values
1204 .  arr1 - array of integers to be sorted
1205 -  arr2 - array of integers to be reordered
1206 
1207    Output Parameters:
1208 +  arr1 - sorted array of integers
1209 -  arr2 - reordered array of integers
1210 
1211    Notes:
1212    The arrays CANNOT overlap.
1213 
1214    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1215    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1216    recomended that the user benchmark their code to see which routine is fastest.
1217 
1218    Level: intermediate
1219 
1220 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1221 @*/
1222 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1223 {
1224   PetscErrorCode ierr;
1225   PetscFunctionBegin;
1226   if (n <= 1) PetscFunctionReturn(0);
1227   PetscValidIntPointer(arr1,2);
1228   PetscValidIntPointer(arr2,3);
1229   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1230   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 /*@
1235    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1236 
1237    Not Collective
1238 
1239    Input Parameters:
1240 +  n   - number of values
1241 -  arr - array of PetscMPIInts
1242 
1243    Output Parameters:
1244 .  arr - sorted array of integers
1245 
1246    Notes:
1247    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1248 
1249    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1250    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1251    recomended that the user benchmark their code to see which routine is fastest.
1252 
1253    Level: intermediate
1254 
1255 .seealso: PetscTimSort(), PetscSortMPIInt()
1256 @*/
1257 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1258 {
1259   PetscErrorCode  ierr;
1260 
1261   PetscFunctionBegin;
1262   if (n <= 1) PetscFunctionReturn(0);
1263   PetscValidIntPointer(arr,2);
1264   if (n < 64) {
1265     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1266   } else {
1267     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 /*@
1273    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1274    array to match the first.
1275 
1276    Not Collective
1277 
1278    Input Parameters:
1279 +  n   - number of values
1280 .  arr1 - array of integers to be sorted
1281 -  arr2 - array of integers to be reordered
1282 
1283    Output Parameters:
1284 +  arr1 - sorted array of integers
1285 -  arr2 - reordered array of integers
1286 
1287    Notes:
1288    The arrays CANNOT overlap.
1289 
1290    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1291    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1292    recomended that the user benchmark their code to see which routine is fastest.
1293 
1294    Level: intermediate
1295 
1296 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1297 @*/
1298 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1299 {
1300   PetscErrorCode ierr;
1301   PetscFunctionBegin;
1302   if (n <= 1) PetscFunctionReturn(0);
1303   PetscValidIntPointer(arr1,2);
1304   PetscValidIntPointer(arr2,3);
1305   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1306   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 /*@
1311    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1312 
1313    Not Collective
1314 
1315    Input Parameters:
1316 +  n   - number of values
1317 -  arr - array of PetscReals
1318 
1319    Output Parameters:
1320 .  arr - sorted array of integers
1321 
1322    Notes:
1323    If the array is less than 64 entries long PetscSortReal() is automatically used.
1324 
1325    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1326    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1327    recomended that the user benchmark their code to see which routine is fastest.
1328 
1329    Level: intermediate
1330 
1331 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1332 @*/
1333 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1334 {
1335   PetscErrorCode  ierr;
1336 
1337   PetscFunctionBegin;
1338   if (n <= 1) PetscFunctionReturn(0);
1339   PetscValidRealPointer(arr,2);
1340   if (n < 64) {
1341     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1342   } else {
1343     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 /*@
1349    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1350    array of PetscInts to match the first.
1351 
1352    Not Collective
1353 
1354    Input Parameters:
1355 +  n   - number of values
1356 .  arr1 - array of PetscReals to be sorted
1357 -  arr2 - array of PetscReals to be reordered
1358 
1359    Output Parameters:
1360 +  arr1 - sorted array of PetscReals
1361 -  arr2 - reordered array of PetscInts
1362 
1363    Notes:
1364    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1365    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1366    recomended that the user benchmark their code to see which routine is fastest.
1367 
1368    Level: intermediate
1369 
1370 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1371 @*/
1372 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1373 {
1374   PetscErrorCode ierr;
1375   PetscFunctionBegin;
1376   if (n <= 1) PetscFunctionReturn(0);
1377   PetscValidRealPointer(arr1,2);
1378   PetscValidIntPointer(arr2,3);
1379   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1380   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383