1 #include <petscsys.h> /*I "petscsys.h" I*/
2 #include <petsc/private/petscimpl.h>
3
Compare_PetscMPIInt_Private(const void * left,const void * right,PETSC_UNUSED PetscCtx ctx)4 static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED PetscCtx ctx)
5 {
6 PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right;
7 return (l < r) ? -1 : (l > r);
8 }
9
Compare_PetscInt_Private(const void * left,const void * right,PETSC_UNUSED PetscCtx ctx)10 static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED PetscCtx ctx)
11 {
12 PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right;
13 return (l < r) ? -1 : (l > r);
14 }
15
Compare_PetscReal_Private(const void * left,const void * right,PETSC_UNUSED PetscCtx ctx)16 static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED PetscCtx ctx)
17 {
18 PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right;
19 return (l < r) ? -1 : (l > r);
20 }
21
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__)
COPYSWAPPY(char * a,char * b,char * t,size_t size)28 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30 __builtin_memcpy(t, b, size);
31 __builtin_memmove(b, a, size);
32 __builtin_memcpy(a, t, size);
33 return;
34 }
35
COPYSWAPPY2(char * al,char * ar,size_t asize,char * bl,char * br,size_t bsize,char * t)36 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37 {
38 __builtin_memcpy(t, ar, asize);
39 __builtin_memmove(ar, al, asize);
40 __builtin_memcpy(al, t, asize);
41 __builtin_memcpy(t, br, bsize);
42 __builtin_memmove(br, bl, bsize);
43 __builtin_memcpy(bl, t, bsize);
44 return;
45 }
46
Petsc_memcpy(char * dest,const char * src,size_t size)47 static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49 __builtin_memcpy(dest, src, size);
50 return;
51 }
52
Petsc_memcpy2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)53 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55 __builtin_memcpy(adest, asrc, asize);
56 __builtin_memcpy(bdest, bsrc, bsize);
57 return;
58 }
59
Petsc_memmove(char * dest,const char * src,size_t size)60 static inline void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62 __builtin_memmove(dest, src, size);
63 return;
64 }
65
Petsc_memmove2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)66 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68 __builtin_memmove(adest, asrc, asize);
69 __builtin_memmove(bdest, bsrc, bsize);
70 return;
71 }
72 #else
COPYSWAPPY(char * a,char * b,char * t,size_t size)73 static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75 PetscFunctionBegin;
76 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size));
77 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size));
78 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size));
79 PetscFunctionReturnVoid();
80 }
81
COPYSWAPPY2(char * al,char * ar,size_t asize,char * bl,char * br,size_t bsize,char * t)82 static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
83 {
84 PetscFunctionBegin;
85 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize));
86 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize));
87 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize));
88 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize));
89 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize));
90 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize));
91 PetscFunctionReturnVoid();
92 }
93
Petsc_memcpy(char * dest,const char * src,size_t size)94 static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
95 {
96 PetscFunctionBegin;
97 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size));
98 PetscFunctionReturnVoid();
99 }
100
Petsc_memcpy2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)101 static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
102 {
103 PetscFunctionBegin;
104 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize));
105 PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize));
106 PetscFunctionReturnVoid();
107 }
108
Petsc_memmove(char * dest,const char * src,size_t size)109 static inline void Petsc_memmove(char *dest, const char *src, size_t size)
110 {
111 PetscFunctionBegin;
112 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size));
113 PetscFunctionReturnVoid();
114 }
115
Petsc_memmove2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)116 static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
117 {
118 PetscFunctionBegin;
119 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize));
120 PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize));
121 PetscFunctionReturnVoid();
122 }
123 #endif
124
125 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
126 x. Output also inclusive.
127
128 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
129
130 {0,1,2,3,4,5}
131
132 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
133 */
PetscGallopSearchLeft_Private(const char * arr,size_t size,CompFunc cmp,PetscCtx ctx,PetscInt l,PetscInt r,const char * x,PetscInt * m)134 static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
135 {
136 PetscInt last = l, k = 1, mid, cur = l + 1;
137
138 PetscFunctionBegin;
139 *m = l;
140 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l);
141 if ((*cmp)(x, arr + r * size, ctx) >= 0) {
142 *m = r;
143 PetscFunctionReturn(PETSC_SUCCESS);
144 }
145 if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS);
146 while (PETSC_TRUE) {
147 if (PetscUnlikely(cur > r)) {
148 cur = r;
149 break;
150 }
151 if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break;
152 last = cur;
153 cur += (k <<= 1) + 1;
154 ++k;
155 }
156 /* standard binary search but take last 0 mid 0 cur 1 into account*/
157 while (cur > last + 1) {
158 mid = last + ((cur - last) >> 1);
159 if ((*cmp)(x, (arr) + mid * size, ctx) < 0) {
160 cur = mid;
161 } else {
162 last = mid;
163 }
164 }
165 *m = cur;
166 PetscFunctionReturn(PETSC_SUCCESS);
167 }
168
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170 < x. Output also inclusive */
PetscGallopSearchRight_Private(const char * arr,size_t size,CompFunc cmp,PetscCtx ctx,PetscInt l,PetscInt r,const char * x,PetscInt * m)171 static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173 PetscInt last = r, k = 1, mid, cur = r - 1;
174
175 PetscFunctionBegin;
176 *m = r;
177 PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l);
178 if ((*cmp)(x, arr + l * size, ctx) <= 0) {
179 *m = l;
180 PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS);
183 while (PETSC_TRUE) {
184 if (PetscUnlikely(cur < l)) {
185 cur = l;
186 break;
187 }
188 if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break;
189 last = cur;
190 cur -= (k <<= 1) + 1;
191 ++k;
192 }
193 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
194 while (last > cur + 1) {
195 mid = last - ((last - cur) >> 1);
196 if ((*cmp)(x, (arr) + mid * size, ctx) > 0) {
197 cur = mid;
198 } else {
199 last = mid;
200 }
201 }
202 *m = cur;
203 PetscFunctionReturn(PETSC_SUCCESS);
204 }
205
206 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
207 complete array, left is first index of left array, mid is first index of right array, right is last index of right
208 array */
PetscTimSortMergeLo_Private(char * arr,char * tarr,size_t size,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt mid,PetscInt right)209 static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
210 {
211 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
212 const PetscInt llen = mid - left;
213
214 PetscFunctionBegin;
215 Petsc_memcpy(tarr, arr + (left * size), llen * size);
216 while ((i < llen) && (j <= right)) {
217 if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) {
218 Petsc_memcpy(arr + (k * size), tarr + (i * size), size);
219 ++k;
220 gallopright = 0;
221 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
222 PetscInt l1, l2, diff1, diff2;
223 do {
224 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
225 /* search temp for right[j], can move up to that of temp into arr immediately */
226 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1));
227 diff1 = l1 - i;
228 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
229 k += diff1;
230 i = l1;
231 /* search right for temp[i], can move up to that many of right into arr */
232 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2));
233 diff2 = l2 - j;
234 Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size);
235 k += diff2;
236 j = l2;
237 if (i >= llen || j > right) break;
238 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
239 ++MIN_GALLOP_GLOBAL;
240 }
241 } else {
242 Petsc_memmove(arr + (k * size), arr + (j * size), size);
243 ++k;
244 gallopleft = 0;
245 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
246 PetscInt l1, l2, diff1, diff2;
247 do {
248 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
249 /* search right for temp[i], can move up to that many of right into arr */
250 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2));
251 diff2 = l2 - j;
252 Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size);
253 k += diff2;
254 j = l2;
255 /* search temp for right[j], can copy up to that of temp into arr immediately */
256 PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1));
257 diff1 = l1 - i;
258 Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
259 k += diff1;
260 i = l1;
261 if (i >= llen || j > right) break;
262 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
263 ++MIN_GALLOP_GLOBAL;
264 }
265 }
266 }
267 if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size);
268 PetscFunctionReturn(PETSC_SUCCESS);
269 }
270
PetscTimSortMergeLoWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt mid,PetscInt right)271 static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
272 {
273 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
274 const PetscInt llen = mid - left;
275
276 PetscFunctionBegin;
277 Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize);
278 while ((i < llen) && (j <= right)) {
279 if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) {
280 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize);
281 ++k;
282 gallopright = 0;
283 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
284 PetscInt l1, l2, diff1, diff2;
285 do {
286 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
287 /* search temp for right[j], can move up to that of temp into arr immediately */
288 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1));
289 diff1 = l1 - i;
290 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
291 k += diff1;
292 i = l1;
293 /* search right for temp[i], can move up to that many of right into arr */
294 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2));
295 diff2 = l2 - j;
296 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
297 k += diff2;
298 j = l2;
299 if (i >= llen || j > right) break;
300 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
301 ++MIN_GALLOP_GLOBAL;
302 }
303 } else {
304 Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize);
305 ++k;
306 gallopleft = 0;
307 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
308 PetscInt l1, l2, diff1, diff2;
309 do {
310 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
311 /* search right for temp[i], can move up to that many of right into arr */
312 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2));
313 diff2 = l2 - j;
314 Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
315 k += diff2;
316 j = l2;
317 /* search temp for right[j], can copy up to that of temp into arr immediately */
318 PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1));
319 diff1 = l1 - i;
320 Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
321 k += diff1;
322 i = l1;
323 if (i >= llen || j > right) break;
324 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
325 ++MIN_GALLOP_GLOBAL;
326 }
327 }
328 }
329 if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize);
330 PetscFunctionReturn(PETSC_SUCCESS);
331 }
332
333 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
334 complete array, left is first index of left array, mid is first index of right array, right is last index of right
335 array */
PetscTimSortMergeHi_Private(char * arr,char * tarr,size_t size,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt mid,PetscInt right)336 static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
337 {
338 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
339 const PetscInt rlen = right - mid + 1;
340
341 PetscFunctionBegin;
342 Petsc_memcpy(tarr, (arr) + mid * size, rlen * size);
343 while ((i >= 0) && (j >= left)) {
344 if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) {
345 Petsc_memcpy((arr) + k * size, (tarr) + i * size, size);
346 --k;
347 gallopleft = 0;
348 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
349 PetscInt l1, l2, diff1, diff2;
350 do {
351 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
352 /* search temp for left[j], can copy up to that many of temp into arr */
353 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1));
354 diff1 = i - l1;
355 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
356 k -= diff1;
357 i = l1;
358 /* search left for temp[i], can move up to that many of left up arr */
359 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2));
360 diff2 = j - l2;
361 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
362 k -= diff2;
363 j = l2;
364 if (i < 0 || j < left) break;
365 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
366 ++MIN_GALLOP_GLOBAL;
367 }
368 } else {
369 Petsc_memmove((arr) + k * size, (arr) + j * size, size);
370 --k;
371 gallopright = 0;
372 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
373 PetscInt l1, l2, diff1, diff2;
374 do {
375 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
376 /* search left for temp[i], can move up to that many of left up arr */
377 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2));
378 diff2 = j - l2;
379 Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
380 k -= diff2;
381 j = l2;
382 /* search temp for left[j], can copy up to that many of temp into arr */
383 PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1));
384 diff1 = i - l1;
385 Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
386 k -= diff1;
387 i = l1;
388 if (i < 0 || j < left) break;
389 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
390 ++MIN_GALLOP_GLOBAL;
391 }
392 }
393 }
394 if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size);
395 PetscFunctionReturn(PETSC_SUCCESS);
396 }
397
PetscTimSortMergeHiWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt mid,PetscInt right)398 static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt mid, PetscInt right)
399 {
400 PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
401 const PetscInt rlen = right - mid + 1;
402
403 PetscFunctionBegin;
404 Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize);
405 while ((i >= 0) && (j >= left)) {
406 if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) {
407 Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize);
408 --k;
409 gallopleft = 0;
410 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
411 PetscInt l1, l2, diff1, diff2;
412 do {
413 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
414 /* search temp for left[j], can copy up to that many of temp into arr */
415 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1));
416 diff1 = i - l1;
417 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
418 k -= diff1;
419 i = l1;
420 /* search left for temp[i], can move up to that many of left up arr */
421 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2));
422 diff2 = j - l2;
423 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
424 k -= diff2;
425 j = l2;
426 if (i < 0 || j < left) break;
427 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
428 ++MIN_GALLOP_GLOBAL;
429 }
430 } else {
431 Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize);
432 --k;
433 gallopright = 0;
434 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
435 PetscInt l1, l2, diff1, diff2;
436 do {
437 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
438 /* search left for temp[i], can move up to that many of left up arr */
439 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2));
440 diff2 = j - l2;
441 Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
442 k -= diff2;
443 j = l2;
444 /* search temp for left[j], can copy up to that many of temp into arr */
445 PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1));
446 diff1 = i - l1;
447 Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
448 k -= diff1;
449 i = l1;
450 if (i < 0 || j < left) break;
451 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
452 ++MIN_GALLOP_GLOBAL;
453 }
454 }
455 }
456 if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize);
457 PetscFunctionReturn(PETSC_SUCCESS);
458 }
459
460 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
461 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
PetscInsertionSort_Private(char * arr,char * tarr,size_t size,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt start,PetscInt right)462 static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
463 {
464 PetscInt i = start == left ? start + 1 : start;
465
466 PetscFunctionBegin;
467 for (; i <= right; ++i) {
468 PetscInt j = i - 1;
469 Petsc_memcpy(tarr, arr + (i * size), size);
470 while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) {
471 COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size);
472 --j;
473 }
474 Petsc_memcpy((arr) + (j + 1) * size, tarr, size);
475 }
476 PetscFunctionReturn(PETSC_SUCCESS);
477 }
478
PetscInsertionSortWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt start,PetscInt right)479 static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
480 {
481 PetscInt i = start == left ? start + 1 : start;
482
483 PetscFunctionBegin;
484 for (; i <= right; ++i) {
485 PetscInt j = i - 1;
486 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
487 while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) {
488 COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize);
489 --j;
490 }
491 Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize);
492 }
493 PetscFunctionReturn(PETSC_SUCCESS);
494 }
495
496 /* See PetscInsertionSort_Private */
PetscBinaryInsertionSort_Private(char * arr,char * tarr,size_t size,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt start,PetscInt right)497 static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
498 {
499 PetscInt i = start == left ? start + 1 : start;
500
501 PetscFunctionBegin;
502 for (; i <= right; ++i) {
503 PetscInt l = left, r = i;
504 Petsc_memcpy(tarr, arr + (i * size), size);
505 do {
506 const PetscInt m = l + ((r - l) >> 1);
507 if ((*cmp)(tarr, arr + (m * size), ctx) < 0) {
508 r = m;
509 } else {
510 l = m + 1;
511 }
512 } while (l < r);
513 Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size);
514 Petsc_memcpy(arr + (l * size), tarr, size);
515 }
516 PetscFunctionReturn(PETSC_SUCCESS);
517 }
518
PetscBinaryInsertionSortWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,PetscCtx ctx,PetscInt left,PetscInt start,PetscInt right)519 static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt left, PetscInt start, PetscInt right)
520 {
521 PetscInt i = start == left ? start + 1 : start;
522
523 PetscFunctionBegin;
524 for (; i <= right; ++i) {
525 PetscInt l = left, r = i;
526 Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
527 do {
528 const PetscInt m = l + ((r - l) >> 1);
529 if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) {
530 r = m;
531 } else {
532 l = m + 1;
533 }
534 } while (l < r);
535 Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize);
536 Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize);
537 }
538 PetscFunctionReturn(PETSC_SUCCESS);
539 }
540
541 typedef struct {
542 PetscInt size;
543 PetscInt start;
544 #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
545 } PetscTimSortStack;
546 #else
547 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt));
548 #endif
549
550 typedef struct {
551 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
552 size_t size;
553 size_t maxsize;
554 } PetscTimSortBuffer;
555
PetscTimSortResizeBuffer_Private(PetscTimSortBuffer * buff,size_t newSize)556 static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
557 {
558 PetscFunctionBegin;
559 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(PETSC_SUCCESS);
560 {
561 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
562 size_t newMax = PetscMin(newSize * newSize, buff->maxsize);
563 PetscCall(PetscFree(buff->ptr));
564 PetscCall(PetscMalloc1(newMax, &buff->ptr));
565 buff->size = newMax;
566 }
567 PetscFunctionReturn(PETSC_SUCCESS);
568 }
569
PetscTimSortForceCollapse_Private(char * arr,size_t size,CompFunc cmp,PetscCtx ctx,PetscTimSortBuffer * buff,PetscTimSortStack * stack,PetscInt stacksize)570 static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
571 {
572 PetscFunctionBegin;
573 for (; stacksize; --stacksize) {
574 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
575 if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) {
576 PetscInt l, m = stack[stacksize].start, r;
577 /* Search A for B[0] insertion */
578 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l));
579 /* Search B for A[-1] insertion */
580 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r));
581 if (m - l <= r - m) {
582 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
583 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
584 } else {
585 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
586 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
587 }
588 }
589 /* Update A with merge */
590 stack[stacksize - 1].size += stack[stacksize].size;
591 }
592 PetscFunctionReturn(PETSC_SUCCESS);
593 }
594
PetscTimSortForceCollapseWithArray_Private(char * arr,size_t asize,char * barr,size_t bsize,CompFunc cmp,PetscCtx ctx,PetscTimSortBuffer * abuff,PetscTimSortBuffer * bbuff,PetscTimSortStack * stack,PetscInt stacksize)595 static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596 {
597 PetscFunctionBegin;
598 for (; stacksize; --stacksize) {
599 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600 if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) {
601 PetscInt l, m = stack[stacksize].start, r;
602 /* Search A for B[0] insertion */
603 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l));
604 /* Search B for A[-1] insertion */
605 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r));
606 if (m - l <= r - m) {
607 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
608 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
609 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
610 } else {
611 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
612 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
613 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
614 }
615 }
616 /* Update A with merge */
617 stack[stacksize - 1].size += stack[stacksize].size;
618 }
619 PetscFunctionReturn(PETSC_SUCCESS);
620 }
621
PetscTimSortMergeCollapse_Private(char * arr,size_t size,CompFunc cmp,PetscCtx ctx,PetscTimSortBuffer * buff,PetscTimSortStack * stack,PetscInt * stacksize)622 static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
623 {
624 PetscInt i = *stacksize;
625
626 PetscFunctionBegin;
627 while (i) {
628 PetscInt l, m, r, itemp = i;
629
630 if (i == 1) {
631 /* A = stack[i-1], B = stack[i] */
632 if (stack[i - 1].size < stack[i].size) {
633 /* if A[-1] <= B[0] then sorted */
634 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
635 m = stack[i].start;
636 /* Search A for B[0] insertion */
637 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l));
638 /* Search B for A[-1] insertion */
639 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r));
640 if (m - l <= r - m) {
641 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
642 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
643 } else {
644 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
645 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
646 }
647 }
648 /* Update A with merge */
649 stack[i - 1].size += stack[i].size;
650 --i;
651 }
652 } else {
653 /* i > 2, i.e. C exists
654 A = stack[i-2], B = stack[i-1], C = stack[i]; */
655 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
656 if (stack[i - 2].size < stack[i].size) {
657 /* merge B into A, but if A[-1] <= B[0] then already sorted */
658 if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) {
659 m = stack[i - 1].start;
660 /* Search A for B[0] insertion */
661 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l));
662 /* Search B for A[-1] insertion */
663 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r));
664 if (m - l <= r - m) {
665 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
666 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
667 } else {
668 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
669 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
670 }
671 }
672 /* Update A with merge */
673 stack[i - 2].size += stack[i - 1].size;
674 /* Push C up the stack */
675 stack[i - 1].start = stack[i].start;
676 stack[i - 1].size = stack[i].size;
677 } else {
678 /* merge C into B */
679 mergeBC:
680 /* If B[-1] <= C[0] then... you know the drill */
681 if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
682 m = stack[i].start;
683 /* Search B for C[0] insertion */
684 PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l));
685 /* Search C for B[-1] insertion */
686 PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r));
687 if (m - l <= r - m) {
688 PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
689 PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
690 } else {
691 PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
692 PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
693 }
694 }
695 /* Update B with merge */
696 stack[i - 1].size += stack[i].size;
697 }
698 --i;
699 } else if (stack[i - 1].size <= stack[i].size) {
700 /* merge C into B */
701 goto mergeBC;
702 }
703 }
704 if (itemp == i) break;
705 }
706 *stacksize = i;
707 PetscFunctionReturn(PETSC_SUCCESS);
708 }
709
PetscTimSortMergeCollapseWithArray_Private(char * arr,size_t asize,char * barr,size_t bsize,CompFunc cmp,PetscCtx ctx,PetscTimSortBuffer * abuff,PetscTimSortBuffer * bbuff,PetscTimSortStack * stack,PetscInt * stacksize)710 static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
711 {
712 PetscInt i = *stacksize;
713
714 PetscFunctionBegin;
715 while (i) {
716 PetscInt l, m, r, itemp = i;
717
718 if (i == 1) {
719 /* A = stack[i-1], B = stack[i] */
720 if (stack[i - 1].size < stack[i].size) {
721 /* if A[-1] <= B[0] then sorted */
722 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
723 m = stack[i].start;
724 /* Search A for B[0] insertion */
725 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l));
726 /* Search B for A[-1] insertion */
727 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r));
728 if (m - l <= r - m) {
729 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
730 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
731 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
732 } else {
733 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
734 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
735 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
736 }
737 }
738 /* Update A with merge */
739 stack[i - 1].size += stack[i].size;
740 --i;
741 }
742 } else {
743 /* i > 2, i.e. C exists
744 A = stack[i-2], B = stack[i-1], C = stack[i]; */
745 if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
746 if (stack[i - 2].size < stack[i].size) {
747 /* merge B into A, but if A[-1] <= B[0] then already sorted */
748 if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) {
749 m = stack[i - 1].start;
750 /* Search A for B[0] insertion */
751 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l));
752 /* Search B for A[-1] insertion */
753 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r));
754 if (m - l <= r - m) {
755 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
756 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
757 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
758 } else {
759 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
760 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
761 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
762 }
763 }
764 /* Update A with merge */
765 stack[i - 2].size += stack[i - 1].size;
766 /* Push C up the stack */
767 stack[i - 1].start = stack[i].start;
768 stack[i - 1].size = stack[i].size;
769 } else {
770 /* merge C into B */
771 mergeBC:
772 /* If B[-1] <= C[0] then... you know the drill */
773 if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
774 m = stack[i].start;
775 /* Search B for C[0] insertion */
776 PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l));
777 /* Search C for B[-1] insertion */
778 PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r));
779 if (m - l <= r - m) {
780 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
781 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
782 PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
783 } else {
784 PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
785 PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
786 PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
787 }
788 }
789 /* Update B with merge */
790 stack[i - 1].size += stack[i].size;
791 }
792 --i;
793 } else if (stack[i - 1].size <= stack[i].size) {
794 /* merge C into B */
795 goto mergeBC;
796 }
797 }
798 if (itemp == i) break;
799 }
800 *stacksize = i;
801 PetscFunctionReturn(PETSC_SUCCESS);
802 }
803
804 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
805 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
806 binary insertion sort or regulat insertion sort */
PetscTimSortBuildRun_Private(char * arr,char * tarr,size_t size,CompFunc cmp,PetscCtx ctx,PetscInt n,PetscInt minrun,PetscInt runstart,PetscInt * runend)807 static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, PetscCtx ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
808 {
809 const PetscInt re = PetscMin(runstart + minrun, n - 1);
810 PetscInt ri = runstart;
811
812 PetscFunctionBegin;
813 if (PetscUnlikely(runstart == n - 1)) {
814 *runend = runstart;
815 PetscFunctionReturn(PETSC_SUCCESS);
816 }
817 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) {
819 ++ri;
820 while (ri < n - 1) {
821 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break;
822 ++ri;
823 }
824 {
825 PetscInt lo = runstart, hi = ri;
826 for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size);
827 }
828 } else {
829 ++ri;
830 while (ri < n - 1) {
831 if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break;
832 ++ri;
833 }
834 }
835 if (ri < re) {
836 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
837 binary search */
838 if (ri - runstart <= minrun >> 1) {
839 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
840 PetscCall(PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re));
841 } else {
842 PetscCall(PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re));
843 }
844 *runend = re;
845 } else *runend = ri;
846 PetscFunctionReturn(PETSC_SUCCESS);
847 }
848
PetscTimSortBuildRunWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,PetscCtx ctx,PetscInt n,PetscInt minrun,PetscInt runstart,PetscInt * runend)849 static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, PetscCtx ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
850 {
851 const PetscInt re = PetscMin(runstart + minrun, n - 1);
852 PetscInt ri = runstart;
853
854 PetscFunctionBegin;
855 if (PetscUnlikely(runstart == n - 1)) {
856 *runend = runstart;
857 PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
860 if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) {
861 ++ri;
862 while (ri < n - 1) {
863 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break;
864 ++ri;
865 }
866 {
867 PetscInt lo = runstart, hi = ri;
868 for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr);
869 }
870 } else {
871 ++ri;
872 while (ri < n - 1) {
873 if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break;
874 ++ri;
875 }
876 }
877 if (ri < re) {
878 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
879 binary search */
880 if (ri - runstart <= minrun >> 1) {
881 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
882 PetscCall(PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re));
883 } else {
884 PetscCall(PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re));
885 }
886 *runend = re;
887 } else *runend = ri;
888 PetscFunctionReturn(PETSC_SUCCESS);
889 }
890
891 /*@C
892 PetscTimSort - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm.
893
894 Not Collective, No Fortran Support
895
896 Input Parameters:
897 + n - number of values
898 . arr - array to be sorted
899 . size - size in bytes of the datatype held in arr
900 . cmp - function pointer to comparison function
901 - ctx - optional context to be passed to comparison function, NULL if not needed
902
903 Output Parameter:
904 . arr - sorted array
905
906 Level: developer
907
908 Notes:
909 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
910 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
911 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
912 do so it repeatedly triggers attempts throughout to merge adjacent runs.
913
914 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
915 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
916 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
917 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
918 likely all contain similar data.
919
920 Example Usage:
921 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
922 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
923 may also
924 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
925 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
926 order
927
928 .vb
929 int increasing_comparison_function(const void *left, const void *right, PetscCtx ctx) {
930 my_type l = *(my_type *) left, r = *(my_type *) right;
931 return (l < r) ? -1 : (l > r);
932 }
933 .ve
934
935 Note the context is unused here but you may use it to pass and subsequently access whatever information required
936 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
937 Then pass the function
938 .vb
939 PetscTimSort(n, arr, sizeof(arr[0]), increasing_comparison_function, ctx)
940 .ve
941
942 Fortran Notes:
943 To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts `left`, `right`, `ctx` and
944 returns `result`. For example
945 .vb
946 subroutine CompareIntegers(left,right,ctx,result)
947 implicit none
948
949 PetscInt,intent(in) :: left, right
950 type(UserCtx) :: ctx
951 integer,intent(out) :: result
952
953 if (left < right) then
954 result = -1
955 else if (left == right) then
956 result = 0
957 else
958 result = 1
959 end if
960 return
961 end subroutine CompareIntegers
962 .ve
963
964 .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()`
965 @*/
PetscTimSort(PetscInt n,void * arr,size_t size,int (* cmp)(const void *,const void *,void *),PetscCtx ctx)966 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), PetscCtx ctx)
967 {
968 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
969 PetscTimSortStack runstack[128];
970 PetscTimSortBuffer buff;
971 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
972 It is so unlikely that this limit is reached that this is __never__ checked for */
973
974 PetscFunctionBegin;
975 /* Compute minrun. Minrun should be (32, 65) such that N/minrun
976 is a power of 2 or one plus a power of 2 */
977 {
978 PetscInt t = n, r = 0;
979 /* r becomes 1 if the least significant bits contain at least one off bit */
980 while (t >= 64) {
981 r |= t & 1;
982 t >>= 1;
983 }
984 minrun = t + r;
985 }
986 if (PetscDefined(USE_DEBUG)) {
987 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun));
988 if (n < 64) PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n));
989 else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun);
990 }
991 PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr));
992 buff.size = (size_t)minrun * size;
993 buff.maxsize = (size_t)n * size;
994 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
995 while (runstart < n) {
996 /* Check if additional entries are at least partially ordered and build natural run */
997 PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend));
998 runstack[stacksize].start = runstart;
999 runstack[stacksize].size = runend - runstart + 1;
1000 PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize));
1001 ++stacksize;
1002 runstart = runend + 1;
1003 }
1004 /* Have been inside while, so discard last stacksize++ */
1005 --stacksize;
1006 PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize));
1007 PetscCall(PetscFree(buff.ptr));
1008 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1009 PetscFunctionReturn(PETSC_SUCCESS);
1010 }
1011
1012 /*@C
1013 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm and
1014 reorders a second array to match the first. The arrays need not be the same type.
1015
1016 Not Collective, No Fortran Support
1017
1018 Input Parameters:
1019 + n - number of values
1020 . asize - size in bytes of the datatype held in arr
1021 . bsize - size in bytes of the datatype held in barr
1022 . cmp - function pointer to comparison function
1023 - ctx - optional context to be passed to comparison function, NULL if not needed
1024
1025 Input/Output Parameters:
1026 + arr - array to be sorted, on output it is sorted
1027 - barr - array to be reordered, on output it is reordered
1028
1029 Level: developer
1030
1031 Notes:
1032 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1033 overlap.
1034
1035 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1036 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1037 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1038 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1039
1040 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1041 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1042 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1043 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1044 likely all contain similar data.
1045
1046 Example Usage:
1047 The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
1048 between its arguments. If left < right \: return -1, if left == right \: return 0, if left > right \: return 1. The user
1049 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1050 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1051 increasing order
1052
1053 .vb
1054 int increasing_comparison_function(const void *left, const void *right, PetscCtx ctx) {
1055 my_type l = *(my_type *) left, r = *(my_type *) right;
1056 return (l < r) ? -1 : (l > r);
1057 }
1058 .ve
1059
1060 Note the context is unused here but you may use it to pass and subsequently access whatever information required
1061 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1062 Then pass the function
1063 .vb
1064 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), increasing_comparison_function, ctx)
1065 .ve
1066
1067 Fortran Notes:
1068 To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts `left`, `right`, `ctx` and
1069 returns `result`. For example
1070 .vb
1071 subroutine CompareIntegers(left,right,ctx,result)
1072 implicit none
1073
1074 PetscInt,intent(in) :: left, right
1075 type(UserCtx) :: ctx
1076 integer,intent(out) :: result
1077
1078 if (left < right) then
1079 result = -1
1080 else if (left == right) then
1081 result = 0
1082 else
1083 result = 1
1084 end if
1085 return
1086 end subroutine CompareIntegers
1087 .ve
1088
1089 .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()`
1090 @*/
PetscTimSortWithArray(PetscInt n,void * arr,size_t asize,void * barr,size_t bsize,int (* cmp)(const void *,const void *,void *),PetscCtx ctx)1091 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), PetscCtx ctx)
1092 {
1093 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1094 PetscTimSortStack runstack[128];
1095 PetscTimSortBuffer abuff, bbuff;
1096 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1097 It is so unlikely that this limit is reached that this is __never__ checked for */
1098
1099 PetscFunctionBegin;
1100 /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1101 is a power of 2 or one plus a power of 2 */
1102 {
1103 PetscInt t = n, r = 0;
1104 /* r becomes 1 if the least significant bits contain at least one off bit */
1105 while (t >= 64) {
1106 r |= t & 1;
1107 t >>= 1;
1108 }
1109 minrun = t + r;
1110 }
1111 if (PetscDefined(USE_DEBUG)) {
1112 PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun));
1113 PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun);
1114 }
1115 PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr));
1116 abuff.size = (size_t)minrun * asize;
1117 abuff.maxsize = (size_t)n * asize;
1118 PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr));
1119 bbuff.size = (size_t)minrun * bsize;
1120 bbuff.maxsize = (size_t)n * bsize;
1121 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1122 while (runstart < n) {
1123 /* Check if additional entries are at least partially ordered and build natural run */
1124 PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend));
1125 runstack[stacksize].start = runstart;
1126 runstack[stacksize].size = runend - runstart + 1;
1127 PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize));
1128 ++stacksize;
1129 runstart = runend + 1;
1130 }
1131 /* Have been inside while, so discard last stacksize++ */
1132 --stacksize;
1133 PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize));
1134 PetscCall(PetscFree(abuff.ptr));
1135 PetscCall(PetscFree(bbuff.ptr));
1136 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1137 PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139
1140 /*@
1141 PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order.
1142
1143 Not Collective
1144
1145 Input Parameters:
1146 + n - number of values
1147 - arr - array of integers
1148
1149 Output Parameter:
1150 . arr - sorted array of integers
1151
1152 Level: intermediate
1153
1154 Notes:
1155 If the array is less than 64 entries long `PetscSortInt()` is automatically used.
1156
1157 This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is
1158 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1159 recommended that the user benchmark their code to see which routine is fastest.
1160
1161 .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()`
1162 @*/
PetscIntSortSemiOrdered(PetscInt n,PetscInt arr[])1163 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1164 {
1165 PetscFunctionBegin;
1166 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1167 PetscAssertPointer(arr, 2);
1168 if (n < 64) {
1169 PetscCall(PetscSortInt(n, arr));
1170 } else {
1171 PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL));
1172 }
1173 PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175
1176 /*@
1177 PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second
1178 `PetscInt` array to match the first.
1179
1180 Not Collective
1181
1182 Input Parameter:
1183 . n - number of values
1184
1185 Input/Output Parameters:
1186 + arr1 - array of integers to be sorted, modified on output
1187 - arr2 - array of integers to be reordered, modified on output
1188
1189 Level: intermediate
1190
1191 Notes:
1192 The arrays CANNOT overlap.
1193
1194 This function serves as an alternative to `PetscSortIntWithArray()`. While this function works for any array of integers it is
1195 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1196 recommended that the user benchmark their code to see which routine is fastest.
1197
1198 .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()`
1199 @*/
PetscIntSortSemiOrderedWithArray(PetscInt n,PetscInt arr1[],PetscInt arr2[])1200 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1201 {
1202 PetscFunctionBegin;
1203 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1204 PetscAssertPointer(arr1, 2);
1205 PetscAssertPointer(arr2, 3);
1206 /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1207 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL));
1208 PetscFunctionReturn(PETSC_SUCCESS);
1209 }
1210
1211 /*@
1212 PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order.
1213
1214 Not Collective
1215
1216 Input Parameters:
1217 + n - number of values
1218 - arr - array of `PetscMPIInt`
1219
1220 Output Parameter:
1221 . arr - sorted array of integers
1222
1223 Level: intermediate
1224
1225 Notes:
1226 If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used.
1227
1228 This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is
1229 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1230 recommended that the user benchmark their code to see which routine is fastest.
1231
1232 .seealso: `PetscTimSort()`, `PetscSortMPIInt()`
1233 @*/
PetscMPIIntSortSemiOrdered(PetscInt n,PetscMPIInt arr[])1234 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1235 {
1236 PetscFunctionBegin;
1237 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1238 PetscAssertPointer(arr, 2);
1239 if (n < 64) {
1240 PetscCall(PetscSortMPIInt(n, arr));
1241 } else {
1242 PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL));
1243 }
1244 PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246
1247 /*@
1248 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt`
1249 array to match the first.
1250
1251 Not Collective
1252
1253 Input Parameter:
1254 . n - number of values
1255
1256 Input/Output Parameters:
1257 + arr1 - array of integers to be sorted, modified on output
1258 - arr2 - array of integers to be reordered, modified on output
1259
1260 Level: intermediate
1261
1262 Notes:
1263 The arrays CANNOT overlap.
1264
1265 This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is
1266 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1267 recommended that the user benchmark their code to see which routine is fastest.
1268
1269 .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()`
1270 @*/
PetscMPIIntSortSemiOrderedWithArray(PetscInt n,PetscMPIInt arr1[],PetscMPIInt arr2[])1271 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1272 {
1273 PetscFunctionBegin;
1274 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1275 PetscAssertPointer(arr1, 2);
1276 PetscAssertPointer(arr2, 3);
1277 /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1278 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL));
1279 PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281
1282 /*@
1283 PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order.
1284
1285 Not Collective
1286
1287 Input Parameters:
1288 + n - number of values
1289 - arr - array of `PetscReal`
1290
1291 Output Parameter:
1292 . arr - sorted array of integers
1293
1294 Level: intermediate
1295
1296 Notes:
1297 If the array is less than 64 entries long `PetscSortReal()` is automatically used.
1298
1299 This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is
1300 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1301 recommended that the user benchmark their code to see which routine is fastest.
1302
1303 .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()`
1304 @*/
PetscRealSortSemiOrdered(PetscInt n,PetscReal arr[])1305 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1306 {
1307 PetscFunctionBegin;
1308 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1309 PetscAssertPointer(arr, 2);
1310 if (n < 64) {
1311 PetscCall(PetscSortReal(n, arr));
1312 } else {
1313 PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL));
1314 }
1315 PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317
1318 /*@
1319 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second
1320 array of `PetscInt` to match the first.
1321
1322 Not Collective
1323
1324 Input Parameter:
1325 . n - number of values
1326
1327 Input/Output Parameters:
1328 + arr1 - array of `PetscReal` to be sorted, modified on output
1329 - arr2 - array of `PetscInt` to be reordered, modified on output
1330
1331 Level: intermediate
1332
1333 Notes:
1334 This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is
1335 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1336 recommended that the user benchmark their code to see which routine is fastest.
1337
1338 .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()`
1339 @*/
PetscRealSortSemiOrderedWithArrayInt(PetscInt n,PetscReal arr1[],PetscInt arr2[])1340 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1341 {
1342 PetscFunctionBegin;
1343 if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1344 PetscAssertPointer(arr1, 2);
1345 PetscAssertPointer(arr2, 3);
1346 /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1347 PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL));
1348 PetscFunctionReturn(PETSC_SUCCESS);
1349 }
1350