xref: /petsc/src/sys/utils/sortso.c (revision 86e171c2e8cb2483b83c668879cf08e5ef2d2eed)
1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ab, asize);
39   __builtin_memmove(ab, aa, asize);
40   __builtin_memcpy(aa, t, asize);
41   __builtin_memcpy(t, bb, bsize);
42   __builtin_memmove(bb, ba, bsize);
43   __builtin_memcpy(ba, t, bsize);
44   return;
45 }
46 
47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, asrc, bsize);
57   return;
58 }
59 
60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ab, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ab, aa, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(aa, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, bb, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(bb, ba, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(ba, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, asrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size)) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize)) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize)) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size)) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize)) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
543 
544 typedef struct {
545   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546   size_t size;
547   size_t maxsize;
548 } PetscTimSortBuffer;
549 
550 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551 {
552   PetscFunctionBegin;
553   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
554   {
555     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
556     PetscErrorCode ierr;
557     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
558     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
559     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
560     buff->size = newMax;
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566 {
567   PetscFunctionBegin;
568   for (;stacksize; --stacksize) {
569     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size) > 0) {
571       PetscInt       l, m = stack[stacksize].start, r;
572       PetscErrorCode ierr;
573       /* Search A for B[0] insertion */
574       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
575       /* Search B for A[-1] insertion */
576       ierr = PetscGallopSearchRight_Private(arr, size, cmp, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
577       if (m-l <= r-m) {
578         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
579         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
580       } else {
581         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
582         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
583       }
584     }
585     /* Update A with merge */
586     stack[stacksize-1].size += stack[stacksize].size;
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592 {
593   PetscFunctionBegin;
594   for (;stacksize; --stacksize) {
595     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize) > 0) {
597       PetscInt       l, m = stack[stacksize].start, r;
598       PetscErrorCode ierr;
599       /* Search A for B[0] insertion */
600       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
601       /* Search B for A[-1] insertion */
602       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
603       if (m-l <= r-m) {
604         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
605         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
606         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
607       } else {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
611       }
612     }
613     /* Update A with merge */
614     stack[stacksize-1].size += stack[stacksize].size;
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620 {
621   PetscInt       i = *stacksize;
622   PetscErrorCode ierr;
623 
624   PetscFunctionBegin;
625   while (i) {
626     PetscInt l, m, r, itemp = i;
627 
628     if (i == 1) {
629       /* A = stack[i-1], B = stack[i] */
630       if (stack[i-1].size < stack[i].size) {
631         /* if A[-1] <= B[0] then sorted */
632         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size) > 0) {
633           m = stack[i].start;
634           /* Search A for B[0] insertion */
635           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
636           /* Search B for A[-1] insertion */
637           ierr = PetscGallopSearchRight_Private(arr, size, cmp, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
638           if (m-l <= r-m) {
639             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
640             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
641           } else {
642             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
643             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
644           }
645         }
646         /* Update A with merge */
647         stack[i-1].size += stack[i].size;
648         --i;
649       }
650     } else {
651       /* i > 2, i.e. C exists
652        A = stack[i-2], B = stack[i-1], C = stack[i]; */
653       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654         if (stack[i-2].size < stack[i].size) {
655           /* merge B into A, but if A[-1] <= B[0] then already sorted */
656           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size) > 0) {
657             m = stack[i-1].start;
658             /* Search A for B[0] insertion */
659             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
660             /* Search B for A[-1] insertion */
661             ierr = PetscGallopSearchRight_Private(arr, size, cmp, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
662             if (m-l <= r-m) {
663               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
664               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
665             } else {
666               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
667               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
668             }
669           }
670           /* Update A with merge */
671           stack[i-2].size += stack[i-1].size;
672           /* Push C up the stack */
673           stack[i-1].start = stack[i].start;
674           stack[i-1].size = stack[i].size;
675         } else {
676           /* merge C into B */
677           mergeBC:
678           /* If B[-1] <= C[0] then... you know the drill */
679           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size) > 0) {
680             m = stack[i].start;
681             /* Search B for C[0] insertion */
682             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
683             /* Search C for B[-1] insertion */
684             ierr = PetscGallopSearchRight_Private(arr, size, cmp, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
685             if (m-l <= r-m) {
686               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
687               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
688             } else {
689               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
690               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, l, m, r);CHKERRQ(ierr);
691             }
692           }
693           /* Update B with merge */
694           stack[i-1].size += stack[i].size;
695         }
696         --i;
697       } else if (stack[i-1].size <= stack[i].size) {
698         /* merge C into B */
699         goto mergeBC;
700       }
701     }
702     if (itemp == i) break;
703   }
704   *stacksize = i;
705   PetscFunctionReturn(0);
706 }
707 
708 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709 {
710   PetscInt       i = *stacksize;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714   while (i) {
715     PetscInt l, m, r, itemp = i;
716 
717     if (i == 1) {
718       /* A = stack[i-1], B = stack[i] */
719       if (stack[i-1].size < stack[i].size) {
720         /* if A[-1] <= B[0] then sorted */
721         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize) > 0) {
722           m = stack[i].start;
723           /* Search A for B[0] insertion */
724           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
725           /* Search B for A[-1] insertion */
726           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
727           if (m-l <= r-m) {
728             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
729             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
730             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
731           } else {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
735           }
736         }
737         /* Update A with merge */
738         stack[i-1].size += stack[i].size;
739         --i;
740       }
741     } else {
742       /* i > 2, i.e. C exists
743        A = stack[i-2], B = stack[i-1], C = stack[i]; */
744       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745         if (stack[i-2].size < stack[i].size) {
746           /* merge B into A, but if A[-1] <= B[0] then already sorted */
747           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize) > 0) {
748             m = stack[i-1].start;
749             /* Search A for B[0] insertion */
750             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
751             /* Search B for A[-1] insertion */
752             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
753             if (m-l <= r-m) {
754               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
755               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
756               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
757             } else {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
761             }
762           }
763           /* Update A with merge */
764           stack[i-2].size += stack[i-1].size;
765           /* Push C up the stack */
766           stack[i-1].start = stack[i].start;
767           stack[i-1].size = stack[i].size;
768         } else {
769           /* merge C into B */
770           mergeBC:
771           /* If B[-1] <= C[0] then... you know the drill */
772           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize) > 0) {
773             m = stack[i].start;
774             /* Search B for C[0] insertion */
775             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
776             /* Search C for B[-1] insertion */
777             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
778             if (m-l <= r-m) {
779               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
780               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
781               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
782             } else {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, l, m, r);CHKERRQ(ierr);
786             }
787           }
788           /* Update B with merge */
789           stack[i-1].size += stack[i].size;
790         }
791         --i;
792       } else if (stack[i-1].size <= stack[i].size) {
793         /* merge C into B */
794         goto mergeBC;
795       }
796     }
797     if (itemp == i) break;
798   }
799   *stacksize = i;
800   PetscFunctionReturn(0);
801 }
802 
803 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805  binary insertion sort or regulat insertion sort */
806 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807 {
808   const PetscInt re = PetscMin(runstart+minrun, n-1);
809   PetscInt       ri = runstart;
810 
811   PetscFunctionBegin;
812   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
813   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size) < 0) {
815     ++ri;
816     while (ri < n-1) {
817       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size) >= 0) break;
818       ++ri;
819     }
820     {
821       PetscInt lo = runstart, hi = ri;
822       for (; lo < hi; ++lo, --hi) {
823         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824       }
825     }
826   } else {
827     ++ri;
828     while (ri < n-1) {
829       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size) < 0) break;
830       ++ri;
831     }
832   }
833 #if defined(PETSC_USE_DEBUG)
834   {
835     PetscErrorCode ierr;
836     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
837   }
838 #endif
839   if (ri < re) {
840     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841      binary search */
842     if (ri-runstart <= minrun >> 1) {
843       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844       PetscInsertionSort_Private(arr, tarr, size, cmp, runstart, ri, re);
845     } else {
846       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, runstart, ri, re);
847     }
848     *runend = re;
849   } else *runend = ri;
850   PetscFunctionReturn(0);
851 }
852 
853 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854 {
855   const PetscInt re = PetscMin(runstart+minrun, n-1);
856   PetscInt       ri = runstart;
857 
858   PetscFunctionBegin;
859   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
860   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize)) < 0) {
862     ++ri;
863     while (ri < n-1) {
864       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize) >= 0) break;
865       ++ri;
866     }
867     {
868       PetscInt lo = runstart, hi = ri;
869       for (; lo < hi; ++lo, --hi) {
870         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871       }
872     }
873   } else {
874     ++ri;
875     while (ri < n-1) {
876       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize) < 0) break;
877       ++ri;
878     }
879   }
880 #if defined(PETSC_USE_DEBUG)
881   {
882     PetscErrorCode ierr;
883     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
884   }
885 #endif
886   if (ri < re) {
887     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888      binary search */
889     if (ri-runstart <= minrun >> 1) {
890       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, runstart, ri, re);
892     } else {
893       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, runstart, ri, re);
894     }
895     *runend = re;
896   } else *runend = ri;
897   PetscFunctionReturn(0);
898 }
899 
900 /*
901   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
902 
903   Not Collective
904 
905   Input Parameters:
906 + n    - number of values
907 . arr  - array to be sorted
908 . size - size in bytes of the datatype held in arr
909 - cmp  - function pointer to comparison function
910 
911   Output Parameters:
912 . arr  - sorted array
913 
914   Sample usage:
915   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
916   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
917   may also
918  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
919  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
920   order:
921 
922 .vb
923   int my_increasing_comparison_function(const void *left, const void *right) {
924     my_type l = *(my_type *) left, r = *(my_type *) right;
925     return (l < r) ? -1 : (l > r);
926   }
927 .ve
928   Then pass the function
929 .vb
930   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function)
931 .ve
932 
933   Notes: Timsort makes the assumption that input data is already likely partially ordered, or that it contains
934   contiguous sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It
935   therefore aims
936  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
937  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
938 
939   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
940   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
941   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
942   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
943   likely all contain similar data.
944 
945   References:
946   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
947 
948   Level: developer
949 
950 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
951 */
952 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *))
953 {
954   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
955   PetscTimSortStack  runstack[128];
956   PetscTimSortBuffer buff;
957   PetscErrorCode     ierr;
958   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
959    It is so unlikely that this limit is reached that this is __never__ checked for */
960 
961   PetscFunctionBegin;
962   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
963    is a power of 2 or one plus a power of 2 */
964   {
965     PetscInt t = n, r = 0;
966     /* r becomes 1 if the least significant bits contain at least one off bit */
967     while (t >= 64) {
968       r |= t & 1;
969       t >>= 1;
970     }
971     minrun = t + r;
972   }
973   if (PetscUnlikelyDebug(minrun < 32 || minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
974   ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
975   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
976   buff.size = (size_t) minrun*size;
977   buff.maxsize = (size_t) n*size;
978   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
979   while (runstart < n) {
980     /* Check if additional entries are at least partially ordered and build natural run */
981     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, n, minrun, runstart, &runend);CHKERRQ(ierr);
982     runstack[stacksize].start = runstart;
983     runstack[stacksize].size = runend-runstart+1;
984     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, &buff, runstack, &stacksize);CHKERRQ(ierr);
985     ++stacksize;
986     runstart = runend+1;
987   }
988   /* Have been inside while, so discard last stacksize++ */
989   --stacksize;
990   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, &buff, runstack, stacksize);CHKERRQ(ierr);
991   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
992   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
993   PetscFunctionReturn(0);
994 }
995 
996 /*
997   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
998   reorders a second array to match the first. The arrays need not be the same type.
999 
1000   Not Collective
1001 
1002   Input Parameters:
1003 + n     - number of values
1004 . arr   - array to be sorted
1005 . asize - size in bytes of the datatype held in arr
1006 . barr  - array to be reordered
1007 . asize - size in bytes of the datatype held in barr
1008 - cmp   - function pointer to comparison function
1009 
1010   Output Parameters:
1011 + arr  - sorted array
1012 + barr - reordered array
1013 
1014   Sample usage:
1015   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1016   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1017   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1018   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1019   increasing order:
1020 
1021 .vb
1022   int my_increasing_comparison_function(const void *left, const void *right) {
1023     my_type l = *(my_type *) left, r = *(my_type *) right;
1024     return (l < r) ? -1 : (l > r);
1025   }
1026 .ve
1027   Then pass the function
1028 .vb
1029   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function)
1030 .ve
1031 
1032   Notes:
1033   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1034   overlap.
1035 
1036   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1037   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1038  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1039  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1040 
1041   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively
1042   search the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into
1043   place in bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible
1044   from these searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same
1045   size, they likely all contain similar data.
1046 
1047   References:
1048   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1049 
1050   Level: developer
1051 
1052 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1053 */
1054 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *))
1055 {
1056   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1057   PetscTimSortStack  runstack[128];
1058   PetscTimSortBuffer abuff, bbuff;
1059   PetscErrorCode     ierr;
1060   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1061    It is so unlikely that this limit is reached that this is __never__ checked for */
1062 
1063   PetscFunctionBegin;
1064   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1065    is a power of 2 or one plus a power of 2 */
1066   {
1067     PetscInt t = n, r = 0;
1068     /* r becomes 1 if the least significant bits contain at least one off bit */
1069     while (t >= 64) {
1070       r |= t & 1;
1071       t >>= 1;
1072     }
1073     minrun = t + r;
1074   }
1075   if (PetscUnlikelyDebug(minrun < 32 || minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1076   ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1077   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1078   abuff.size = (size_t) minrun*asize;
1079   abuff.maxsize = (size_t) n*asize;
1080   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1081   bbuff.size = (size_t) minrun*bsize;
1082   bbuff.maxsize = (size_t) n*bsize;
1083   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1084   while (runstart < n) {
1085     /* Check if additional entries are at least partially ordered and build natural run */
1086     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, n, minrun, runstart, &runend);CHKERRQ(ierr);
1087     runstack[stacksize].start = runstart;
1088     runstack[stacksize].size = runend-runstart+1;
1089     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1090     ++stacksize;
1091     runstart = runend+1;
1092   }
1093   /* Have been inside while, so discard last stacksize++ */
1094   --stacksize;
1095   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1096   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1097   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1098   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 /*@
1103    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1104 
1105    Not Collective
1106 
1107    Input Parameters:
1108 +  n   - number of values
1109 -  arr - array of integers
1110 
1111    Output Parameters:
1112 .  arr - sorted array of integers
1113 
1114    Notes:
1115    If the array is less than 64 entries long PetscSortInt() is automatically used.
1116 
1117    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1118    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1119    recomended that the user benchmark their code to see which routine is fastest.
1120 
1121    Level: intermediate
1122 
1123 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1124 @*/
1125 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1126 {
1127   PetscErrorCode ierr;
1128   PetscFunctionBegin;
1129   if (n <= 1) PetscFunctionReturn(0);
1130   PetscValidIntPointer(arr,2);
1131   if (n < 64) {
1132     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1133   } else {
1134     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private);CHKERRQ(ierr);
1135   }
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 /*@
1140    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1141    array to match the first.
1142 
1143    Not Collective
1144 
1145    Input Parameters:
1146 +  n   - number of values
1147 .  arr1 - array of integers to be sorted
1148 -  arr2 - array of integers to be reordered
1149 
1150    Output Parameters:
1151 +  arr1 - sorted array of integers
1152 -  arr2 - reordered array of integers
1153 
1154    Notes:
1155    The arrays CANNOT overlap.
1156 
1157    If the array to be sorted is less than 64 entries long PetscSortIntWithArray() is automatically used.
1158 
1159    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1160    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1161    recomended that the user benchmark their code to see which routine is fastest.
1162 
1163    Level: intermediate
1164 
1165 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1166 @*/
1167 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1168 {
1169   PetscErrorCode ierr;
1170   PetscFunctionBegin;
1171   PetscValidIntPointer(arr1,2);
1172   PetscValidIntPointer(arr2,3);
1173   if (n == 1) PetscFunctionReturn(0);
1174   if (n < 64) {
1175     ierr = PetscSortIntWithArray(n, arr1, arr2);CHKERRQ(ierr);
1176   } else {
1177     ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private);CHKERRQ(ierr);
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 /*@
1183    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1184 
1185    Not Collective
1186 
1187    Input Parameters:
1188 +  n   - number of values
1189 -  arr - array of PetscMPIInts
1190 
1191    Output Parameters:
1192 .  arr - sorted array of integers
1193 
1194    Notes:
1195    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1196 
1197    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1198    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1199    recomended that the user benchmark their code to see which routine is fastest.
1200 
1201    Level: intermediate
1202 
1203 .seealso: PetscTimSort(), PetscSortMPIInt()
1204 @*/
1205 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1206 {
1207   PetscErrorCode  ierr;
1208 
1209   PetscFunctionBegin;
1210   PetscValidIntPointer(arr,2);
1211   if (n == 1) PetscFunctionReturn(0);
1212   if (n < 64) {
1213     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1214   } else {
1215     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private);CHKERRQ(ierr);
1216   }
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 /*@
1221    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1222    array to match the first.
1223 
1224    Not Collective
1225 
1226    Input Parameters:
1227 +  n   - number of values
1228 .  arr1 - array of integers to be sorted
1229 -  arr2 - array of integers to be reordered
1230 
1231    Output Parameters:
1232 +  arr1 - sorted array of integers
1233 -  arr2 - reordered array of integers
1234 
1235    Notes:
1236    The arrays CANNOT overlap.
1237 
1238    If the array to be sorted is less than 64 entries long PetscSortMPIIntWithArray() is automatically used.
1239 
1240    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1241    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1242    recomended that the user benchmark their code to see which routine is fastest.
1243 
1244    Level: intermediate
1245 
1246 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1247 @*/
1248 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1249 {
1250   PetscErrorCode ierr;
1251   PetscFunctionBegin;
1252   if (n <= 1) PetscFunctionReturn(0);
1253   PetscValidIntPointer(arr1,2);
1254   PetscValidIntPointer(arr2,3);
1255   if (n < 64) {
1256     ierr = PetscSortMPIIntWithArray(n, arr1, arr2);CHKERRQ(ierr);
1257   } else {
1258     ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private);CHKERRQ(ierr);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 /*@
1264    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1265 
1266    Not Collective
1267 
1268    Input Parameters:
1269 +  n   - number of values
1270 -  arr - array of PetscReals
1271 
1272    Output Parameters:
1273 .  arr - sorted array of integers
1274 
1275    Notes:
1276    If the array is less than 64 entries long PetscSortReal() is automatically used.
1277 
1278    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1279    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1280    recomended that the user benchmark their code to see which routine is fastest.
1281 
1282    Level: intermediate
1283 
1284 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1285 @*/
1286 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1287 {
1288   PetscErrorCode  ierr;
1289 
1290   PetscFunctionBegin;
1291   if (n <= 1) PetscFunctionReturn(0);
1292   PetscValidRealPointer(arr,2);
1293   if (n < 64) {
1294     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1295   } else {
1296     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 /*@
1302    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1303    array of PetscInts to match the first.
1304 
1305    Not Collective
1306 
1307    Input Parameters:
1308 +  n   - number of values
1309 .  arr1 - array of PetscReals to be sorted
1310 -  arr2 - array of PetscReals to be reordered
1311 
1312    Output Parameters:
1313 +  arr1 - sorted array of PetscReals
1314 -  arr2 - reordered array of PetscInts
1315 
1316    Notes:
1317    If the array to be sorted is less than 64 entries long PetscSortRealWithArrayInt() is automatically used.
1318 
1319    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1320    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1321    recomended that the user benchmark their code to see which routine is fastest.
1322 
1323    Level: intermediate
1324 
1325 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1326 @*/
1327 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1328 {
1329   PetscErrorCode ierr;
1330   PetscFunctionBegin;
1331   if (n <= 1) PetscFunctionReturn(0);
1332   PetscValidRealPointer(arr1,2);
1333   PetscValidIntPointer(arr2,3);
1334   if (n < 64) {
1335     ierr = PetscSortRealWithArrayInt(n, arr1, arr2);CHKERRQ(ierr);
1336   } else {
1337     ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private);CHKERRQ(ierr);
1338   }
1339   PetscFunctionReturn(0);
1340 }
1341