xref: /petsc/src/sys/utils/sortso.c (revision fd5a855563b981e3d8ad570cd3f080c8adb7dcc2)
1 #include <petscsys.h> /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED PetscCtx ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED PetscCtx ctx)
11 {
12   PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED PetscCtx ctx)
17 {
18   PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__)
28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ar, asize);
39   __builtin_memmove(ar, al, asize);
40   __builtin_memcpy(al, t, asize);
41   __builtin_memcpy(t, br, bsize);
42   __builtin_memmove(br, bl, bsize);
43   __builtin_memcpy(bl, t, bsize);
44   return;
45 }
46 
47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, bsrc, bsize);
57   return;
58 }
59 
60 static inline void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 #else
73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscFunctionBegin;
76   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size));
77   PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size));
78   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size));
79   PetscFunctionReturnVoid();
80 }
81 
82 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
83 {
84   PetscFunctionBegin;
85   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize));
86   PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize));
87   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize));
88   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize));
89   PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize));
90   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize));
91   PetscFunctionReturnVoid();
92 }
93 
94 static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
95 {
96   PetscFunctionBegin;
97   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size));
98   PetscFunctionReturnVoid();
99 }
100 
101 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
102 {
103   PetscFunctionBegin;
104   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize));
105   PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize));
106   PetscFunctionReturnVoid();
107 }
108 
109 static inline void Petsc_memmove(char *dest, const char *src, size_t size)
110 {
111   PetscFunctionBegin;
112   PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size));
113   PetscFunctionReturnVoid();
114 }
115 
116 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
117 {
118   PetscFunctionBegin;
119   PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize));
120   PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize));
121   PetscFunctionReturnVoid();
122 }
123 #endif
124 
125 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
126  x. Output also inclusive.
127 
128  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
129 
130    {0,1,2,3,4,5}
131 
132    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
133   */
134 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
135 {
136   PetscInt last = l, k = 1, mid, cur = l + 1;
137 
138   PetscFunctionBegin;
139   *m = l;
140   PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l);
141   if ((*cmp)(x, arr + r * size, ctx) >= 0) {
142     *m = r;
143     PetscFunctionReturn(PETSC_SUCCESS);
144   }
145   if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS);
146   while (PETSC_TRUE) {
147     if (PetscUnlikely(cur > r)) {
148       cur = r;
149       break;
150     }
151     if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr) + mid * size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r - 1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l);
178   if ((*cmp)(x, arr + l * size, ctx) <= 0) {
179     *m = l;
180     PetscFunctionReturn(PETSC_SUCCESS);
181   }
182   if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS);
183   while (PETSC_TRUE) {
184     if (PetscUnlikely(cur < l)) {
185       cur = l;
186       break;
187     }
188     if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break;
189     last = cur;
190     cur -= (k <<= 1) + 1;
191     ++k;
192   }
193   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
194   while (last > cur + 1) {
195     mid = last - ((last - cur) >> 1);
196     if ((*cmp)(x, (arr) + mid * size, ctx) > 0) {
197       cur = mid;
198     } else {
199       last = mid;
200     }
201   }
202   *m = cur;
203   PetscFunctionReturn(PETSC_SUCCESS);
204 }
205 
206 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
207  complete array, left is first index of left array, mid is first index of right array, right is last index of right
208  array */
209 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
210 {
211   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
212   const PetscInt llen = mid - left;
213 
214   PetscFunctionBegin;
215   Petsc_memcpy(tarr, arr + (left * size), llen * size);
216   while ((i < llen) && (j <= right)) {
217     if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) {
218       Petsc_memcpy(arr + (k * size), tarr + (i * size), size);
219       ++k;
220       gallopright = 0;
221       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
222         PetscInt l1, l2, diff1, diff2;
223         do {
224           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
225           /* search temp for right[j], can move up to that of temp into arr immediately */
226           PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1));
227           diff1 = l1 - i;
228           Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
229           k += diff1;
230           i = l1;
231           /* search right for temp[i], can move up to that many of right into arr */
232           PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2));
233           diff2 = l2 - j;
234           Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size);
235           k += diff2;
236           j = l2;
237           if (i >= llen || j > right) break;
238         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
239         ++MIN_GALLOP_GLOBAL;
240       }
241     } else {
242       Petsc_memmove(arr + (k * size), arr + (j * size), size);
243       ++k;
244       gallopleft = 0;
245       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
246         PetscInt l1, l2, diff1, diff2;
247         do {
248           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
249           /* search right for temp[i], can move up to that many of right into arr */
250           PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2));
251           diff2 = l2 - j;
252           Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size);
253           k += diff2;
254           j = l2;
255           /* search temp for right[j], can copy up to that of temp into arr immediately */
256           PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1));
257           diff1 = l1 - i;
258           Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
259           k += diff1;
260           i = l1;
261           if (i >= llen || j > right) break;
262         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
263         ++MIN_GALLOP_GLOBAL;
264       }
265     }
266   }
267   if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size);
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
272 {
273   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
274   const PetscInt llen = mid - left;
275 
276   PetscFunctionBegin;
277   Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize);
278   while ((i < llen) && (j <= right)) {
279     if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) {
280       Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize);
281       ++k;
282       gallopright = 0;
283       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
284         PetscInt l1, l2, diff1, diff2;
285         do {
286           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
287           /* search temp for right[j], can move up to that of temp into arr immediately */
288           PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1));
289           diff1 = l1 - i;
290           Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
291           k += diff1;
292           i = l1;
293           /* search right for temp[i], can move up to that many of right into arr */
294           PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2));
295           diff2 = l2 - j;
296           Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
297           k += diff2;
298           j = l2;
299           if (i >= llen || j > right) break;
300         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
301         ++MIN_GALLOP_GLOBAL;
302       }
303     } else {
304       Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize);
305       ++k;
306       gallopleft = 0;
307       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
308         PetscInt l1, l2, diff1, diff2;
309         do {
310           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
311           /* search right for temp[i], can move up to that many of right into arr */
312           PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2));
313           diff2 = l2 - j;
314           Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
315           k += diff2;
316           j = l2;
317           /* search temp for right[j], can copy up to that of temp into arr immediately */
318           PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1));
319           diff1 = l1 - i;
320           Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
321           k += diff1;
322           i = l1;
323           if (i >= llen || j > right) break;
324         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
325         ++MIN_GALLOP_GLOBAL;
326       }
327     }
328   }
329   if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize);
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
334  complete array, left is first index of left array, mid is first index of right array, right is last index of right
335  array */
336 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
337 {
338   PetscInt       i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
339   const PetscInt rlen = right - mid + 1;
340 
341   PetscFunctionBegin;
342   Petsc_memcpy(tarr, (arr) + mid * size, rlen * size);
343   while ((i >= 0) && (j >= left)) {
344     if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) {
345       Petsc_memcpy((arr) + k * size, (tarr) + i * size, size);
346       --k;
347       gallopleft = 0;
348       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
349         PetscInt l1, l2, diff1, diff2;
350         do {
351           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
352           /* search temp for left[j], can copy up to that many of temp into arr */
353           PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1));
354           diff1 = i - l1;
355           Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
356           k -= diff1;
357           i = l1;
358           /* search left for temp[i], can move up to that many of left up arr */
359           PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2));
360           diff2 = j - l2;
361           Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
362           k -= diff2;
363           j = l2;
364           if (i < 0 || j < left) break;
365         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
366         ++MIN_GALLOP_GLOBAL;
367       }
368     } else {
369       Petsc_memmove((arr) + k * size, (arr) + j * size, size);
370       --k;
371       gallopright = 0;
372       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
373         PetscInt l1, l2, diff1, diff2;
374         do {
375           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
376           /* search left for temp[i], can move up to that many of left up arr */
377           PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2));
378           diff2 = j - l2;
379           Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
380           k -= diff2;
381           j = l2;
382           /* search temp for left[j], can copy up to that many of temp into arr */
383           PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1));
384           diff1 = i - l1;
385           Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
386           k -= diff1;
387           i = l1;
388           if (i < 0 || j < left) break;
389         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
390         ++MIN_GALLOP_GLOBAL;
391       }
392     }
393   }
394   if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size);
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
398 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
399 {
400   PetscInt       i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
401   const PetscInt rlen = right - mid + 1;
402 
403   PetscFunctionBegin;
404   Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize);
405   while ((i >= 0) && (j >= left)) {
406     if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) {
407       Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize);
408       --k;
409       gallopleft = 0;
410       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
411         PetscInt l1, l2, diff1, diff2;
412         do {
413           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
414           /* search temp for left[j], can copy up to that many of temp into arr */
415           PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1));
416           diff1 = i - l1;
417           Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
418           k -= diff1;
419           i = l1;
420           /* search left for temp[i], can move up to that many of left up arr */
421           PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2));
422           diff2 = j - l2;
423           Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
424           k -= diff2;
425           j = l2;
426           if (i < 0 || j < left) break;
427         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
428         ++MIN_GALLOP_GLOBAL;
429       }
430     } else {
431       Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize);
432       --k;
433       gallopright = 0;
434       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
435         PetscInt l1, l2, diff1, diff2;
436         do {
437           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
438           /* search left for temp[i], can move up to that many of left up arr */
439           PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2));
440           diff2 = j - l2;
441           Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
442           k -= diff2;
443           j = l2;
444           /* search temp for left[j], can copy up to that many of temp into arr */
445           PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1));
446           diff1 = i - l1;
447           Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
448           k -= diff1;
449           i = l1;
450           if (i < 0 || j < left) break;
451         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
452         ++MIN_GALLOP_GLOBAL;
453       }
454     }
455   }
456   if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize);
457   PetscFunctionReturn(PETSC_SUCCESS);
458 }
459 
460 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
461  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
462 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
463 {
464   PetscInt i = start == left ? start + 1 : start;
465 
466   PetscFunctionBegin;
467   for (; i <= right; ++i) {
468     PetscInt j = i - 1;
469     Petsc_memcpy(tarr, arr + (i * size), size);
470     while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) {
471       COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size);
472       --j;
473     }
474     Petsc_memcpy((arr) + (j + 1) * size, tarr, size);
475   }
476   PetscFunctionReturn(PETSC_SUCCESS);
477 }
478 
479 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
480 {
481   PetscInt i = start == left ? start + 1 : start;
482 
483   PetscFunctionBegin;
484   for (; i <= right; ++i) {
485     PetscInt j = i - 1;
486     Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
487     while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) {
488       COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize);
489       --j;
490     }
491     Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize);
492   }
493   PetscFunctionReturn(PETSC_SUCCESS);
494 }
495 
496 /* See PetscInsertionSort_Private */
497 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
498 {
499   PetscInt i = start == left ? start + 1 : start;
500 
501   PetscFunctionBegin;
502   for (; i <= right; ++i) {
503     PetscInt l = left, r = i;
504     Petsc_memcpy(tarr, arr + (i * size), size);
505     do {
506       const PetscInt m = l + ((r - l) >> 1);
507       if ((*cmp)(tarr, arr + (m * size), ctx) < 0) {
508         r = m;
509       } else {
510         l = m + 1;
511       }
512     } while (l < r);
513     Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size);
514     Petsc_memcpy(arr + (l * size), tarr, size);
515   }
516   PetscFunctionReturn(PETSC_SUCCESS);
517 }
518 
519 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
520 {
521   PetscInt i = start == left ? start + 1 : start;
522 
523   PetscFunctionBegin;
524   for (; i <= right; ++i) {
525     PetscInt l = left, r = i;
526     Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
527     do {
528       const PetscInt m = l + ((r - l) >> 1);
529       if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) {
530         r = m;
531       } else {
532         l = m + 1;
533       }
534     } while (l < r);
535     Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize);
536     Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize);
537   }
538   PetscFunctionReturn(PETSC_SUCCESS);
539 }
540 
541 typedef struct {
542   PetscInt size;
543   PetscInt start;
544 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
545 } PetscTimSortStack;
546 #else
547 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt));
548 #endif
549 
550 typedef struct {
551   char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
552   size_t    size;
553   size_t    maxsize;
554 } PetscTimSortBuffer;
555 
556 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
557 {
558   PetscFunctionBegin;
559   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(PETSC_SUCCESS);
560   {
561     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
562     size_t newMax = PetscMin(newSize * newSize, buff->maxsize);
563     PetscCall(PetscFree(buff->ptr));
564     PetscCall(PetscMalloc1(newMax, &buff->ptr));
565     buff->size = newMax;
566   }
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
571 {
572   PetscFunctionBegin;
573   for (; stacksize; --stacksize) {
574     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
575     if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) {
576       PetscInt l, m = stack[stacksize].start, r;
577       /* Search A for B[0] insertion */
578       PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l));
579       /* Search B for A[-1] insertion */
580       PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r));
581       if (m - l <= r - m) {
582         PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
583         PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
584       } else {
585         PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
586         PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
587       }
588     }
589     /* Update A with merge */
590     stack[stacksize - 1].size += stack[stacksize].size;
591   }
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596 {
597   PetscFunctionBegin;
598   for (; stacksize; --stacksize) {
599     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600     if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) {
601       PetscInt l, m = stack[stacksize].start, r;
602       /* Search A for B[0] insertion */
603       PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l));
604       /* Search B for A[-1] insertion */
605       PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r));
606       if (m - l <= r - m) {
607         PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
608         PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
609         PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
610       } else {
611         PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
612         PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
613         PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
614       }
615     }
616     /* Update A with merge */
617     stack[stacksize - 1].size += stack[stacksize].size;
618   }
619   PetscFunctionReturn(PETSC_SUCCESS);
620 }
621 
622 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
623 {
624   PetscInt i = *stacksize;
625 
626   PetscFunctionBegin;
627   while (i) {
628     PetscInt l, m, r, itemp = i;
629 
630     if (i == 1) {
631       /* A = stack[i-1], B = stack[i] */
632       if (stack[i - 1].size < stack[i].size) {
633         /* if A[-1] <= B[0] then sorted */
634         if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
635           m = stack[i].start;
636           /* Search A for B[0] insertion */
637           PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l));
638           /* Search B for A[-1] insertion */
639           PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r));
640           if (m - l <= r - m) {
641             PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
642             PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
643           } else {
644             PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
645             PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
646           }
647         }
648         /* Update A with merge */
649         stack[i - 1].size += stack[i].size;
650         --i;
651       }
652     } else {
653       /* i > 2, i.e. C exists
654        A = stack[i-2], B = stack[i-1], C = stack[i]; */
655       if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
656         if (stack[i - 2].size < stack[i].size) {
657           /* merge B into A, but if A[-1] <= B[0] then already sorted */
658           if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) {
659             m = stack[i - 1].start;
660             /* Search A for B[0] insertion */
661             PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l));
662             /* Search B for A[-1] insertion */
663             PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r));
664             if (m - l <= r - m) {
665               PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
666               PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
667             } else {
668               PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
669               PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
670             }
671           }
672           /* Update A with merge */
673           stack[i - 2].size += stack[i - 1].size;
674           /* Push C up the stack */
675           stack[i - 1].start = stack[i].start;
676           stack[i - 1].size  = stack[i].size;
677         } else {
678         /* merge C into B */
679         mergeBC:
680           /* If B[-1] <= C[0] then... you know the drill */
681           if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
682             m = stack[i].start;
683             /* Search B for C[0] insertion */
684             PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l));
685             /* Search C for B[-1] insertion */
686             PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r));
687             if (m - l <= r - m) {
688               PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
689               PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
690             } else {
691               PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
692               PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
693             }
694           }
695           /* Update B with merge */
696           stack[i - 1].size += stack[i].size;
697         }
698         --i;
699       } else if (stack[i - 1].size <= stack[i].size) {
700         /* merge C into B */
701         goto mergeBC;
702       }
703     }
704     if (itemp == i) break;
705   }
706   *stacksize = i;
707   PetscFunctionReturn(PETSC_SUCCESS);
708 }
709 
710 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
711 {
712   PetscInt i = *stacksize;
713 
714   PetscFunctionBegin;
715   while (i) {
716     PetscInt l, m, r, itemp = i;
717 
718     if (i == 1) {
719       /* A = stack[i-1], B = stack[i] */
720       if (stack[i - 1].size < stack[i].size) {
721         /* if A[-1] <= B[0] then sorted */
722         if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
723           m = stack[i].start;
724           /* Search A for B[0] insertion */
725           PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l));
726           /* Search B for A[-1] insertion */
727           PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r));
728           if (m - l <= r - m) {
729             PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
730             PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
731             PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
732           } else {
733             PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
734             PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
735             PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
736           }
737         }
738         /* Update A with merge */
739         stack[i - 1].size += stack[i].size;
740         --i;
741       }
742     } else {
743       /* i > 2, i.e. C exists
744        A = stack[i-2], B = stack[i-1], C = stack[i]; */
745       if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
746         if (stack[i - 2].size < stack[i].size) {
747           /* merge B into A, but if A[-1] <= B[0] then already sorted */
748           if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) {
749             m = stack[i - 1].start;
750             /* Search A for B[0] insertion */
751             PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l));
752             /* Search B for A[-1] insertion */
753             PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r));
754             if (m - l <= r - m) {
755               PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
756               PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
757               PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
758             } else {
759               PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
760               PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
761               PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
762             }
763           }
764           /* Update A with merge */
765           stack[i - 2].size += stack[i - 1].size;
766           /* Push C up the stack */
767           stack[i - 1].start = stack[i].start;
768           stack[i - 1].size  = stack[i].size;
769         } else {
770         /* merge C into B */
771         mergeBC:
772           /* If B[-1] <= C[0] then... you know the drill */
773           if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
774             m = stack[i].start;
775             /* Search B for C[0] insertion */
776             PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l));
777             /* Search C for B[-1] insertion */
778             PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r));
779             if (m - l <= r - m) {
780               PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
781               PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
782               PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
783             } else {
784               PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
785               PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
786               PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
787             }
788           }
789           /* Update B with merge */
790           stack[i - 1].size += stack[i].size;
791         }
792         --i;
793       } else if (stack[i - 1].size <= stack[i].size) {
794         /* merge C into B */
795         goto mergeBC;
796       }
797     }
798     if (itemp == i) break;
799   }
800   *stacksize = i;
801   PetscFunctionReturn(PETSC_SUCCESS);
802 }
803 
804 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
805  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
806  binary insertion sort or regulat insertion sort */
807 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
808 {
809   const PetscInt re = PetscMin(runstart + minrun, n - 1);
810   PetscInt       ri = runstart;
811 
812   PetscFunctionBegin;
813   if (PetscUnlikely(runstart == n - 1)) {
814     *runend = runstart;
815     PetscFunctionReturn(PETSC_SUCCESS);
816   }
817   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818   if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) {
819     ++ri;
820     while (ri < n - 1) {
821       if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break;
822       ++ri;
823     }
824     {
825       PetscInt lo = runstart, hi = ri;
826       for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size);
827     }
828   } else {
829     ++ri;
830     while (ri < n - 1) {
831       if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break;
832       ++ri;
833     }
834   }
835   if (ri < re) {
836     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
837      binary search */
838     if (ri - runstart <= minrun >> 1) {
839       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
840       PetscCall(PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re));
841     } else {
842       PetscCall(PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re));
843     }
844     *runend = re;
845   } else *runend = ri;
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
850 {
851   const PetscInt re = PetscMin(runstart + minrun, n - 1);
852   PetscInt       ri = runstart;
853 
854   PetscFunctionBegin;
855   if (PetscUnlikely(runstart == n - 1)) {
856     *runend = runstart;
857     PetscFunctionReturn(PETSC_SUCCESS);
858   }
859   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
860   if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) {
861     ++ri;
862     while (ri < n - 1) {
863       if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break;
864       ++ri;
865     }
866     {
867       PetscInt lo = runstart, hi = ri;
868       for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr);
869     }
870   } else {
871     ++ri;
872     while (ri < n - 1) {
873       if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break;
874       ++ri;
875     }
876   }
877   if (ri < re) {
878     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
879      binary search */
880     if (ri - runstart <= minrun >> 1) {
881       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
882       PetscCall(PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re));
883     } else {
884       PetscCall(PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re));
885     }
886     *runend = re;
887   } else *runend = ri;
888   PetscFunctionReturn(PETSC_SUCCESS);
889 }
890 
891 /*@C
892   PetscTimSort - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm.
893 
894   Not Collective, No Fortran Support
895 
896   Input Parameters:
897 + n    - number of values
898 . arr  - array to be sorted
899 . size - size in bytes of the datatype held in arr
900 . cmp  - function pointer to comparison function
901 - ctx  - optional context to be passed to comparison function, NULL if not needed
902 
903   Output Parameter:
904 . arr - sorted array
905 
906   Level: developer
907 
908   Notes:
909   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
910   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
911   select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
912   do so it repeatedly triggers attempts throughout to merge adjacent runs.
913 
914   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
915   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
916   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
917   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
918   likely all contain similar data.
919 
920   Example Usage:
921   The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
922   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
923   may also
924   change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
925   the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
926   order
927 
928 .vb
929   int increasing_comparison_function(const void *left, const void *right, PetscCtx ctx) {
930     my_type l = *(my_type *) left, r = *(my_type *) right;
931     return (l < r) ? -1 : (l > r);
932   }
933 .ve
934 
935   Note the context is unused here but you may use it to pass and subsequently access whatever information required
936   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
937   Then pass the function
938 .vb
939   PetscTimSort(n, arr, sizeof(arr[0]), increasing_comparison_function, ctx)
940 .ve
941 
942   Fortran Notes:
943   To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts `left`, `right`, `ctx` and
944   returns `result`. For example
945 .vb
946  subroutine CompareIntegers(left,right,ctx,result)
947    implicit none
948 
949    PetscInt,intent(in) :: left, right
950    type(UserCtx)       :: ctx
951    integer,intent(out) :: result
952 
953    if (left < right) then
954      result = -1
955    else if (left == right) then
956      result = 0
957    else
958      result = 1
959    end if
960    return
961  end subroutine CompareIntegers
962 .ve
963 
964 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()`
965 @*/
966 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), PetscCtx ctx)
967 {
968   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
969   PetscTimSortStack  runstack[128];
970   PetscTimSortBuffer buff;
971   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
972    It is so unlikely that this limit is reached that this is __never__ checked for */
973 
974   PetscFunctionBegin;
975   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
976    is a power of 2 or one plus a power of 2 */
977   {
978     PetscInt t = n, r = 0;
979     /* r becomes 1 if the least significant bits contain at least one off bit */
980     while (t >= 64) {
981       r |= t & 1;
982       t >>= 1;
983     }
984     minrun = t + r;
985   }
986   if (PetscDefined(USE_DEBUG)) {
987     PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun));
988     if (n < 64) PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n));
989     else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun);
990   }
991   PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr));
992   buff.size         = (size_t)minrun * size;
993   buff.maxsize      = (size_t)n * size;
994   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
995   while (runstart < n) {
996     /* Check if additional entries are at least partially ordered and build natural run */
997     PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend));
998     runstack[stacksize].start = runstart;
999     runstack[stacksize].size  = runend - runstart + 1;
1000     PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize));
1001     ++stacksize;
1002     runstart = runend + 1;
1003   }
1004   /* Have been inside while, so discard last stacksize++ */
1005   --stacksize;
1006   PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize));
1007   PetscCall(PetscFree(buff.ptr));
1008   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1009   PetscFunctionReturn(PETSC_SUCCESS);
1010 }
1011 
1012 /*@C
1013   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm and
1014   reorders a second array to match the first. The arrays need not be the same type.
1015 
1016   Not Collective, No Fortran Support
1017 
1018   Input Parameters:
1019 + n     - number of values
1020 . asize - size in bytes of the datatype held in arr
1021 . bsize - size in bytes of the datatype held in barr
1022 . cmp   - function pointer to comparison function
1023 - ctx   - optional context to be passed to comparison function, NULL if not needed
1024 
1025   Input/Output Parameters:
1026 + arr  - array to be sorted, on output it is sorted
1027 - barr - array to be reordered, on output it is reordered
1028 
1029   Level: developer
1030 
1031   Notes:
1032   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1033   overlap.
1034 
1035   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1036   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1037   to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1038   arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1039 
1040   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1041   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1042   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1043   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1044   likely all contain similar data.
1045 
1046   Example Usage:
1047   The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
1048   between its arguments. If left < right \: return -1, if left == right \: return 0, if left > right \: return 1. The user
1049   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1050   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1051   increasing order
1052 
1053 .vb
1054   int increasing_comparison_function(const void *left, const void *right, PetscCtx ctx) {
1055     my_type l = *(my_type *) left, r = *(my_type *) right;
1056     return (l < r) ? -1 : (l > r);
1057   }
1058 .ve
1059 
1060   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1061   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1062   Then pass the function
1063 .vb
1064   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), increasing_comparison_function, ctx)
1065 .ve
1066 
1067   Fortran Notes:
1068   To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts `left`, `right`, `ctx` and
1069   returns `result`. For example
1070 .vb
1071  subroutine CompareIntegers(left,right,ctx,result)
1072    implicit none
1073 
1074    PetscInt,intent(in) :: left, right
1075    type(UserCtx)       :: ctx
1076    integer,intent(out) :: result
1077 
1078    if (left < right) then
1079      result = -1
1080    else if (left == right) then
1081      result = 0
1082    else
1083      result = 1
1084    end if
1085    return
1086  end subroutine CompareIntegers
1087 .ve
1088 
1089 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()`
1090 @*/
1091 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), PetscCtx ctx)
1092 {
1093   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1094   PetscTimSortStack  runstack[128];
1095   PetscTimSortBuffer abuff, bbuff;
1096   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1097    It is so unlikely that this limit is reached that this is __never__ checked for */
1098 
1099   PetscFunctionBegin;
1100   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1101    is a power of 2 or one plus a power of 2 */
1102   {
1103     PetscInt t = n, r = 0;
1104     /* r becomes 1 if the least significant bits contain at least one off bit */
1105     while (t >= 64) {
1106       r |= t & 1;
1107       t >>= 1;
1108     }
1109     minrun = t + r;
1110   }
1111   if (PetscDefined(USE_DEBUG)) {
1112     PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun));
1113     PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun);
1114   }
1115   PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr));
1116   abuff.size    = (size_t)minrun * asize;
1117   abuff.maxsize = (size_t)n * asize;
1118   PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr));
1119   bbuff.size        = (size_t)minrun * bsize;
1120   bbuff.maxsize     = (size_t)n * bsize;
1121   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1122   while (runstart < n) {
1123     /* Check if additional entries are at least partially ordered and build natural run */
1124     PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend));
1125     runstack[stacksize].start = runstart;
1126     runstack[stacksize].size  = runend - runstart + 1;
1127     PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize));
1128     ++stacksize;
1129     runstart = runend + 1;
1130   }
1131   /* Have been inside while, so discard last stacksize++ */
1132   --stacksize;
1133   PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize));
1134   PetscCall(PetscFree(abuff.ptr));
1135   PetscCall(PetscFree(bbuff.ptr));
1136   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*@
1141   PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order.
1142 
1143   Not Collective
1144 
1145   Input Parameters:
1146 + n   - number of values
1147 - arr - array of integers
1148 
1149   Output Parameter:
1150 . arr - sorted array of integers
1151 
1152   Level: intermediate
1153 
1154   Notes:
1155   If the array is less than 64 entries long `PetscSortInt()` is automatically used.
1156 
1157   This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is
1158   significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1159   recommended that the user benchmark their code to see which routine is fastest.
1160 
1161 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()`
1162 @*/
1163 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1164 {
1165   PetscFunctionBegin;
1166   if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1167   PetscAssertPointer(arr, 2);
1168   if (n < 64) {
1169     PetscCall(PetscSortInt(n, arr));
1170   } else {
1171     PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL));
1172   }
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 /*@
1177   PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second
1178   `PetscInt` array to match the first.
1179 
1180   Not Collective
1181 
1182   Input Parameter:
1183 . n - number of values
1184 
1185   Input/Output Parameters:
1186 + arr1 - array of integers to be sorted, modified on output
1187 - arr2 - array of integers to be reordered, modified on output
1188 
1189   Level: intermediate
1190 
1191   Notes:
1192   The arrays CANNOT overlap.
1193 
1194   This function serves as an alternative to `PetscSortIntWithArray()`. While this function works for any array of integers it is
1195   significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1196   recommended that the user benchmark their code to see which routine is fastest.
1197 
1198 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()`
1199 @*/
1200 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1201 {
1202   PetscFunctionBegin;
1203   if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1204   PetscAssertPointer(arr1, 2);
1205   PetscAssertPointer(arr2, 3);
1206   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1207   PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL));
1208   PetscFunctionReturn(PETSC_SUCCESS);
1209 }
1210 
1211 /*@
1212   PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order.
1213 
1214   Not Collective
1215 
1216   Input Parameters:
1217 + n   - number of values
1218 - arr - array of `PetscMPIInt`
1219 
1220   Output Parameter:
1221 . arr - sorted array of integers
1222 
1223   Level: intermediate
1224 
1225   Notes:
1226   If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used.
1227 
1228   This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is
1229   significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1230   recommended that the user benchmark their code to see which routine is fastest.
1231 
1232 .seealso: `PetscTimSort()`, `PetscSortMPIInt()`
1233 @*/
1234 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1235 {
1236   PetscFunctionBegin;
1237   if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1238   PetscAssertPointer(arr, 2);
1239   if (n < 64) {
1240     PetscCall(PetscSortMPIInt(n, arr));
1241   } else {
1242     PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL));
1243   }
1244   PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246 
1247 /*@
1248   PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt`
1249   array to match the first.
1250 
1251   Not Collective
1252 
1253   Input Parameter:
1254 . n - number of values
1255 
1256   Input/Output Parameters:
1257 + arr1 - array of integers to be sorted, modified on output
1258 - arr2 - array of integers to be reordered, modified on output
1259 
1260   Level: intermediate
1261 
1262   Notes:
1263   The arrays CANNOT overlap.
1264 
1265   This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is
1266   significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1267   recommended that the user benchmark their code to see which routine is fastest.
1268 
1269 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()`
1270 @*/
1271 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1272 {
1273   PetscFunctionBegin;
1274   if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1275   PetscAssertPointer(arr1, 2);
1276   PetscAssertPointer(arr2, 3);
1277   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1278   PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL));
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 
1282 /*@
1283   PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order.
1284 
1285   Not Collective
1286 
1287   Input Parameters:
1288 + n   - number of values
1289 - arr - array of `PetscReal`
1290 
1291   Output Parameter:
1292 . arr - sorted array of integers
1293 
1294   Level: intermediate
1295 
1296   Notes:
1297   If the array is less than 64 entries long `PetscSortReal()` is automatically used.
1298 
1299   This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is
1300   significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1301   recommended that the user benchmark their code to see which routine is fastest.
1302 
1303 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()`
1304 @*/
1305 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1306 {
1307   PetscFunctionBegin;
1308   if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1309   PetscAssertPointer(arr, 2);
1310   if (n < 64) {
1311     PetscCall(PetscSortReal(n, arr));
1312   } else {
1313     PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL));
1314   }
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 /*@
1319   PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second
1320   array of `PetscInt` to match the first.
1321 
1322   Not Collective
1323 
1324   Input Parameter:
1325 . n - number of values
1326 
1327   Input/Output Parameters:
1328 + arr1 - array of `PetscReal` to be sorted, modified on output
1329 - arr2 - array of `PetscInt` to be reordered, modified on output
1330 
1331   Level: intermediate
1332 
1333   Notes:
1334   This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is
1335   significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1336   recommended that the user benchmark their code to see which routine is fastest.
1337 
1338 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()`
1339 @*/
1340 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1341 {
1342   PetscFunctionBegin;
1343   if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1344   PetscAssertPointer(arr1, 2);
1345   PetscAssertPointer(arr2, 3);
1346   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1347   PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL));
1348   PetscFunctionReturn(PETSC_SUCCESS);
1349 }
1350