xref: /petsc/src/sys/utils/sortso.c (revision 6913077d92d45dc85e2cd558d8cf01c8c45125fc)
1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ar, asize);
39   __builtin_memmove(ar, al, asize);
40   __builtin_memcpy(al, t, asize);
41   __builtin_memcpy(t, br, bsize);
42   __builtin_memmove(br, bl, bsize);
43   __builtin_memcpy(bl, t, bsize);
44   return;
45 }
46 
47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, bsrc, bsize);
57   return;
58 }
59 
60 static inline void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
83 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ar, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ar, al, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(al, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, br, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(br, bl, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(bl, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
96 static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
104 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
113 static inline void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
121 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
140 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   PetscAssert(r >= l,PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
203 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
266 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
332 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
395 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
495 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
543 } PetscTimSortStack;
544 #else
545 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
546 #endif
547 
548 typedef struct {
549   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
550   size_t size;
551   size_t maxsize;
552 } PetscTimSortBuffer;
553 
554 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
555 {
556   PetscFunctionBegin;
557   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
558   {
559     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
560     PetscErrorCode ierr;
561     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
562     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
563     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
564     buff->size = newMax;
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
570 {
571   PetscFunctionBegin;
572   for (;stacksize; --stacksize) {
573     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
574     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
575       PetscInt       l, m = stack[stacksize].start, r;
576       PetscErrorCode ierr;
577       /* Search A for B[0] insertion */
578       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
579       /* Search B for A[-1] insertion */
580       ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
581       if (m-l <= r-m) {
582         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
583         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
584       } else {
585         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
586         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
587       }
588     }
589     /* Update A with merge */
590     stack[stacksize-1].size += stack[stacksize].size;
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596 {
597   PetscFunctionBegin;
598   for (;stacksize; --stacksize) {
599     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
601       PetscInt       l, m = stack[stacksize].start, r;
602       PetscErrorCode ierr;
603       /* Search A for B[0] insertion */
604       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
605       /* Search B for A[-1] insertion */
606       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
607       if (m-l <= r-m) {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611       } else {
612         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
613         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
614         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
615       }
616     }
617     /* Update A with merge */
618     stack[stacksize-1].size += stack[stacksize].size;
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
624 {
625   PetscInt       i = *stacksize;
626   PetscErrorCode ierr;
627 
628   PetscFunctionBegin;
629   while (i) {
630     PetscInt l, m, r, itemp = i;
631 
632     if (i == 1) {
633       /* A = stack[i-1], B = stack[i] */
634       if (stack[i-1].size < stack[i].size) {
635         /* if A[-1] <= B[0] then sorted */
636         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
637           m = stack[i].start;
638           /* Search A for B[0] insertion */
639           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
640           /* Search B for A[-1] insertion */
641           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
642           if (m-l <= r-m) {
643             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
644             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
645           } else {
646             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
647             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
648           }
649         }
650         /* Update A with merge */
651         stack[i-1].size += stack[i].size;
652         --i;
653       }
654     } else {
655       /* i > 2, i.e. C exists
656        A = stack[i-2], B = stack[i-1], C = stack[i]; */
657       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
658         if (stack[i-2].size < stack[i].size) {
659           /* merge B into A, but if A[-1] <= B[0] then already sorted */
660           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
661             m = stack[i-1].start;
662             /* Search A for B[0] insertion */
663             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
664             /* Search B for A[-1] insertion */
665             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
666             if (m-l <= r-m) {
667               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
668               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
669             } else {
670               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
671               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
672             }
673           }
674           /* Update A with merge */
675           stack[i-2].size += stack[i-1].size;
676           /* Push C up the stack */
677           stack[i-1].start = stack[i].start;
678           stack[i-1].size = stack[i].size;
679         } else {
680           /* merge C into B */
681           mergeBC:
682           /* If B[-1] <= C[0] then... you know the drill */
683           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
684             m = stack[i].start;
685             /* Search B for C[0] insertion */
686             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
687             /* Search C for B[-1] insertion */
688             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
689             if (m-l <= r-m) {
690               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
691               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
692             } else {
693               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
694               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
695             }
696           }
697           /* Update B with merge */
698           stack[i-1].size += stack[i].size;
699         }
700         --i;
701       } else if (stack[i-1].size <= stack[i].size) {
702         /* merge C into B */
703         goto mergeBC;
704       }
705     }
706     if (itemp == i) break;
707   }
708   *stacksize = i;
709   PetscFunctionReturn(0);
710 }
711 
712 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
713 {
714   PetscInt       i = *stacksize;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   while (i) {
719     PetscInt l, m, r, itemp = i;
720 
721     if (i == 1) {
722       /* A = stack[i-1], B = stack[i] */
723       if (stack[i-1].size < stack[i].size) {
724         /* if A[-1] <= B[0] then sorted */
725         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
726           m = stack[i].start;
727           /* Search A for B[0] insertion */
728           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
729           /* Search B for A[-1] insertion */
730           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
731           if (m-l <= r-m) {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735           } else {
736             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
737             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
738             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
739           }
740         }
741         /* Update A with merge */
742         stack[i-1].size += stack[i].size;
743         --i;
744       }
745     } else {
746       /* i > 2, i.e. C exists
747        A = stack[i-2], B = stack[i-1], C = stack[i]; */
748       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
749         if (stack[i-2].size < stack[i].size) {
750           /* merge B into A, but if A[-1] <= B[0] then already sorted */
751           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
752             m = stack[i-1].start;
753             /* Search A for B[0] insertion */
754             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
755             /* Search B for A[-1] insertion */
756             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
757             if (m-l <= r-m) {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761             } else {
762               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
763               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
764               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
765             }
766           }
767           /* Update A with merge */
768           stack[i-2].size += stack[i-1].size;
769           /* Push C up the stack */
770           stack[i-1].start = stack[i].start;
771           stack[i-1].size = stack[i].size;
772         } else {
773           /* merge C into B */
774           mergeBC:
775           /* If B[-1] <= C[0] then... you know the drill */
776           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
777             m = stack[i].start;
778             /* Search B for C[0] insertion */
779             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
780             /* Search C for B[-1] insertion */
781             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
782             if (m-l <= r-m) {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786             } else {
787               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
788               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
789               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
790             }
791           }
792           /* Update B with merge */
793           stack[i-1].size += stack[i].size;
794         }
795         --i;
796       } else if (stack[i-1].size <= stack[i].size) {
797         /* merge C into B */
798         goto mergeBC;
799       }
800     }
801     if (itemp == i) break;
802   }
803   *stacksize = i;
804   PetscFunctionReturn(0);
805 }
806 
807 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
808  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
809  binary insertion sort or regulat insertion sort */
810 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
811 {
812   const PetscInt re = PetscMin(runstart+minrun, n-1);
813   PetscInt       ri = runstart;
814 
815   PetscFunctionBegin;
816   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
817   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
819     ++ri;
820     while (ri < n-1) {
821       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
822       ++ri;
823     }
824     {
825       PetscInt lo = runstart, hi = ri;
826       for (; lo < hi; ++lo, --hi) {
827         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
828       }
829     }
830   } else {
831     ++ri;
832     while (ri < n-1) {
833       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
834       ++ri;
835     }
836   }
837   if (ri < re) {
838     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
839      binary search */
840     if (ri-runstart <= minrun >> 1) {
841       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
842       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
843     } else {
844       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845     }
846     *runend = re;
847   } else *runend = ri;
848   PetscFunctionReturn(0);
849 }
850 
851 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
852 {
853   const PetscInt re = PetscMin(runstart+minrun, n-1);
854   PetscInt       ri = runstart;
855 
856   PetscFunctionBegin;
857   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
858   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
859   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
860     ++ri;
861     while (ri < n-1) {
862       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
863       ++ri;
864     }
865     {
866       PetscInt lo = runstart, hi = ri;
867       for (; lo < hi; ++lo, --hi) {
868         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
869       }
870     }
871   } else {
872     ++ri;
873     while (ri < n-1) {
874       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
875       ++ri;
876     }
877   }
878   if (ri < re) {
879     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
880      binary search */
881     if (ri-runstart <= minrun >> 1) {
882       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
883       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
884     } else {
885       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
886     }
887     *runend = re;
888   } else *runend = ri;
889   PetscFunctionReturn(0);
890 }
891 
892 /*@C
893   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
894 
895   Not Collective
896 
897   Input Parameters:
898 + n    - number of values
899 . arr  - array to be sorted
900 . size - size in bytes of the datatype held in arr
901 . cmp  - function pointer to comparison function
902 - ctx  - optional context to be passed to comparison function, NULL if not needed
903 
904   Output Parameters:
905 . arr  - sorted array
906 
907   Notes:
908   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
909  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
910  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
911  do so it repeatedly triggers attempts throughout to merge adjacent runs.
912 
913   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
914   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
915   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
916   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
917   likely all contain similar data.
918 
919   Sample usage:
920   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
921   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
922   may also
923  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
924  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
925   order
926 
927 .vb
928   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
929     my_type l = *(my_type *) left, r = *(my_type *) right;
930     return (l < r) ? -1 : (l > r);
931   }
932 .ve
933   Note the context is unused here but you may use it to pass and subsequently access whatever information required
934   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
935   Then pass the function
936 .vb
937   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
938 .ve
939 
940   Fortran Notes:
941   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
942   returns result. For example
943 .vb
944  subroutine CompareIntegers(left,right,ctx,result)
945    implicit none
946 
947    PetscInt,intent(in) :: left, right
948    type(UserCtx)       :: ctx
949    integer,intent(out) :: result
950 
951    if (left < right) then
952      result = -1
953    else if (left == right) then
954      result = 0
955    else
956      result = 1
957    end if
958    return
959  end subroutine CompareIntegers
960 .ve
961 
962   References:
963 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt
964 
965   Level: developer
966 
967 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
968 @*/
969 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
970 {
971   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
972   PetscTimSortStack  runstack[128];
973   PetscTimSortBuffer buff;
974   PetscErrorCode     ierr;
975   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
976    It is so unlikely that this limit is reached that this is __never__ checked for */
977 
978   PetscFunctionBegin;
979   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
980    is a power of 2 or one plus a power of 2 */
981   {
982     PetscInt t = n, r = 0;
983     /* r becomes 1 if the least significant bits contain at least one off bit */
984     while (t >= 64) {
985       r |= t & 1;
986       t >>= 1;
987     }
988     minrun = t + r;
989   }
990   if (PetscDefined(USE_DEBUG)) {
991     ierr = PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);CHKERRQ(ierr);
992     if (n < 64) {
993       ierr = PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
994     } else PetscCheckFalse((minrun < 32) || (minrun > 65),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %" PetscInt_FMT " not in range (32,65)",minrun);
995   }
996   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
997   buff.size = (size_t) minrun*size;
998   buff.maxsize = (size_t) n*size;
999   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1000   while (runstart < n) {
1001     /* Check if additional entries are at least partially ordered and build natural run */
1002     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1003     runstack[stacksize].start = runstart;
1004     runstack[stacksize].size = runend-runstart+1;
1005     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1006     ++stacksize;
1007     runstart = runend+1;
1008   }
1009   /* Have been inside while, so discard last stacksize++ */
1010   --stacksize;
1011   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1012   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1013   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 /*@C
1018   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1019   reorders a second array to match the first. The arrays need not be the same type.
1020 
1021   Not Collective
1022 
1023   Input Parameters:
1024 + n     - number of values
1025 . asize - size in bytes of the datatype held in arr
1026 . bsize - size in bytes of the datatype held in barr
1027 . cmp   - function pointer to comparison function
1028 - ctx   - optional context to be passed to comparison function, NULL if not needed
1029 
1030   Input/Output Parameters:
1031 + arr  - array to be sorted, on output it is sorted
1032 - barr - array to be reordered, on output it is reordered
1033 
1034   Notes:
1035   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1036   overlap.
1037 
1038   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1039   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1040  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1041  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1042 
1043   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1044   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1045   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1046   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1047   likely all contain similar data.
1048 
1049   Sample usage:
1050   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1051   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1052   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1053   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1054   increasing order
1055 
1056 .vb
1057   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1058     my_type l = *(my_type *) left, r = *(my_type *) right;
1059     return (l < r) ? -1 : (l > r);
1060   }
1061 .ve
1062   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1063   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1064   Then pass the function
1065 .vb
1066   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1067 .ve
1068 
1069   Fortran Notes:
1070   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1071   returns result. For example
1072 .vb
1073  subroutine CompareIntegers(left,right,ctx,result)
1074    implicit none
1075 
1076    PetscInt,intent(in) :: left, right
1077    type(UserCtx)       :: ctx
1078    integer,intent(out) :: result
1079 
1080    if (left < right) then
1081      result = -1
1082    else if (left == right) then
1083      result = 0
1084    else
1085      result = 1
1086    end if
1087    return
1088  end subroutine CompareIntegers
1089 .ve
1090 
1091   References:
1092 . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1093 
1094   Level: developer
1095 
1096 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1097 @*/
1098 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1099 {
1100   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1101   PetscTimSortStack  runstack[128];
1102   PetscTimSortBuffer abuff, bbuff;
1103   PetscErrorCode     ierr;
1104   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1105    It is so unlikely that this limit is reached that this is __never__ checked for */
1106 
1107   PetscFunctionBegin;
1108   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1109    is a power of 2 or one plus a power of 2 */
1110   {
1111     PetscInt t = n, r = 0;
1112     /* r becomes 1 if the least significant bits contain at least one off bit */
1113     while (t >= 64) {
1114       r |= t & 1;
1115       t >>= 1;
1116     }
1117     minrun = t + r;
1118   }
1119   if (PetscDefined(USE_DEBUG)) {
1120     ierr = PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);CHKERRQ(ierr);
1121     PetscCheckFalse((n >= 64) && ((minrun < 32) || (minrun > 65)),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %" PetscInt_FMT " not in range (32,65)",minrun);
1122   }
1123   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1124   abuff.size = (size_t) minrun*asize;
1125   abuff.maxsize = (size_t) n*asize;
1126   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1127   bbuff.size = (size_t) minrun*bsize;
1128   bbuff.maxsize = (size_t) n*bsize;
1129   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1130   while (runstart < n) {
1131     /* Check if additional entries are at least partially ordered and build natural run */
1132     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1133     runstack[stacksize].start = runstart;
1134     runstack[stacksize].size = runend-runstart+1;
1135     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1136     ++stacksize;
1137     runstart = runend+1;
1138   }
1139   /* Have been inside while, so discard last stacksize++ */
1140   --stacksize;
1141   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1142   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1143   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1144   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 /*@
1149    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1150 
1151    Not Collective
1152 
1153    Input Parameters:
1154 +  n   - number of values
1155 -  arr - array of integers
1156 
1157    Output Parameters:
1158 .  arr - sorted array of integers
1159 
1160    Notes:
1161    If the array is less than 64 entries long PetscSortInt() is automatically used.
1162 
1163    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1164    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1165    recommended that the user benchmark their code to see which routine is fastest.
1166 
1167    Level: intermediate
1168 
1169 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1170 @*/
1171 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1172 {
1173   PetscErrorCode ierr;
1174   PetscFunctionBegin;
1175   if (n <= 1) PetscFunctionReturn(0);
1176   PetscValidIntPointer(arr,2);
1177   if (n < 64) {
1178     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1179   } else {
1180     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 /*@
1186    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1187    array to match the first.
1188 
1189    Not Collective
1190 
1191    Input Parameter:
1192 .  n   - number of values
1193 
1194    Input/Output Parameters:
1195 +  arr1 - array of integers to be sorted, modified on output
1196 -  arr2 - array of integers to be reordered, modified on output
1197 
1198    Notes:
1199    The arrays CANNOT overlap.
1200 
1201    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1202    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1203    recommended that the user benchmark their code to see which routine is fastest.
1204 
1205    Level: intermediate
1206 
1207 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1208 @*/
1209 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1210 {
1211   PetscErrorCode ierr;
1212   PetscFunctionBegin;
1213   if (n <= 1) PetscFunctionReturn(0);
1214   PetscValidIntPointer(arr1,2);
1215   PetscValidIntPointer(arr2,3);
1216   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1217   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 /*@
1222    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1223 
1224    Not Collective
1225 
1226    Input Parameters:
1227 +  n   - number of values
1228 -  arr - array of PetscMPIInts
1229 
1230    Output Parameters:
1231 .  arr - sorted array of integers
1232 
1233    Notes:
1234    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1235 
1236    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1237    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1238    recommended that the user benchmark their code to see which routine is fastest.
1239 
1240    Level: intermediate
1241 
1242 .seealso: PetscTimSort(), PetscSortMPIInt()
1243 @*/
1244 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1245 {
1246   PetscErrorCode  ierr;
1247 
1248   PetscFunctionBegin;
1249   if (n <= 1) PetscFunctionReturn(0);
1250   PetscValidIntPointer(arr,2);
1251   if (n < 64) {
1252     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1253   } else {
1254     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 /*@
1260    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1261    array to match the first.
1262 
1263    Not Collective
1264 
1265    Input Parameter:
1266 .  n   - number of values
1267 
1268    Input/Output Parameters:
1269 .  arr1 - array of integers to be sorted, modified on output
1270 -  arr2 - array of integers to be reordered, modified on output
1271 
1272    Notes:
1273    The arrays CANNOT overlap.
1274 
1275    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1276    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1277    recommended that the user benchmark their code to see which routine is fastest.
1278 
1279    Level: intermediate
1280 
1281 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1282 @*/
1283 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1284 {
1285   PetscErrorCode ierr;
1286   PetscFunctionBegin;
1287   if (n <= 1) PetscFunctionReturn(0);
1288   PetscValidIntPointer(arr1,2);
1289   PetscValidIntPointer(arr2,3);
1290   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1291   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 /*@
1296    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1297 
1298    Not Collective
1299 
1300    Input Parameters:
1301 +  n   - number of values
1302 -  arr - array of PetscReals
1303 
1304    Output Parameters:
1305 .  arr - sorted array of integers
1306 
1307    Notes:
1308    If the array is less than 64 entries long PetscSortReal() is automatically used.
1309 
1310    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1311    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1312    recommended that the user benchmark their code to see which routine is fastest.
1313 
1314    Level: intermediate
1315 
1316 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1317 @*/
1318 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1319 {
1320   PetscErrorCode  ierr;
1321 
1322   PetscFunctionBegin;
1323   if (n <= 1) PetscFunctionReturn(0);
1324   PetscValidRealPointer(arr,2);
1325   if (n < 64) {
1326     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1327   } else {
1328     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1329   }
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 /*@
1334    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1335    array of PetscInts to match the first.
1336 
1337    Not Collective
1338 
1339    Input Parameter:
1340 .  n   - number of values
1341 
1342    Input/Output Parameters:
1343 .  arr1 - array of PetscReals to be sorted, modified on output
1344 -  arr2 - array of PetscReals to be reordered, modified on output
1345 
1346    Notes:
1347    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1348    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1349    recommended that the user benchmark their code to see which routine is fastest.
1350 
1351    Level: intermediate
1352 
1353 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1354 @*/
1355 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1356 {
1357   PetscErrorCode ierr;
1358   PetscFunctionBegin;
1359   if (n <= 1) PetscFunctionReturn(0);
1360   PetscValidRealPointer(arr1,2);
1361   PetscValidIntPointer(arr2,3);
1362   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1363   ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366