Line data Source code
1 : /*
2 : * The code of this file was taken from http://jeffreystedfast.blogspot.be,
3 : * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
4 : * The MIT license text is as follows:
5 : *
6 : * Permission is hereby granted, free of charge, to any person obtaining a copy
7 : * of this software and associated documentation files (the "Software"), to
8 : * deal in the Software without restriction, including without limitation the
9 : * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 : * sell copies of the Software, and to permit persons to whom the Software is
11 : * furnished to do so, subject to the following conditions:
12 : *
13 : * The above copyright notice and this permission notice shall be included in
14 : * all copies or substantial portions of the Software.
15 : *
16 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 : * IN THE SOFTWARE.
23 : */
24 :
25 : #include <errno.h>
26 : #include <string.h>
27 : #include <stdlib.h>
28 : #include <isl_sort.h>
29 :
30 : #define MID(lo, hi) (lo + ((hi - lo) >> 1))
31 :
32 : /* The code here is an optimized merge sort. Starting from a generic merge sort
33 : * the following optimizations were applied:
34 : *
35 : * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
36 : * every element into a temporary buffer, blocks of elements are copied
37 : * at a time.
38 : *
39 : * o To reduce the number of memcpy() calls further, copying leading
40 : * and trailing elements into our temporary buffer is avoided, in case it is
41 : * not necessary to merge them.
42 : *
43 : * A further optimization could be to specialize memcpy calls based on the
44 : * size of the types we compare. For now, this code does not include the
45 : * relevant optimization, as clang e.g. inlines a very efficient memcpy()
46 : * implementation. It is not clear, that the specialized version as provided in
47 : * the blog post, is really superior to the one that will be inlined by
48 : * default. So we decided to keep the code simple until this optimization was
49 : * proven to be beneficial.
50 : */
51 :
52 : static void
53 584151708 : msort (void *array, void *buf, size_t low, size_t high, size_t size,
54 : int (* compare) (const void *, const void *, void *), void *arg)
55 : {
56 : char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
57 584151708 : size_t copied = 0;
58 : size_t mid;
59 :
60 584151708 : mid = MID (low, high);
61 :
62 584151708 : if (mid + 1 < high)
63 193598468 : msort (array, buf, mid + 1, high, size, compare, arg);
64 :
65 584151708 : if (mid > low)
66 303178328 : msort (array, buf, low, mid, size, compare, arg);
67 :
68 584151708 : ah = ((char *) array) + ((high + 1) * size);
69 584151708 : am = ((char *) array) + ((mid + 1) * size);
70 584151708 : a1 = al = ((char *) array) + (low * size);
71 :
72 584151708 : b = (char *) buf;
73 584151708 : lo = al;
74 584151708 : hi = am;
75 :
76 : do {
77 637541360 : ls = lo;
78 637541360 : hs = hi;
79 :
80 637541360 : if (lo > al || hi > am) {
81 : /* our last loop already compared lo & hi and found lo <= hi */
82 53389652 : lo += size;
83 : }
84 :
85 2304017067 : while (lo < am && compare (lo, hi, arg) <= 0)
86 1028934347 : lo += size;
87 :
88 637541360 : if (lo < am) {
89 169309325 : if (copied == 0) {
90 : /* avoid copying the leading items */
91 145985234 : a1 = lo;
92 145985234 : ls = lo;
93 : }
94 :
95 : /* our last compare tells us hi < lo */
96 169309325 : hi += size;
97 :
98 460593268 : while (hi < ah && compare (hi, lo, arg) < 0)
99 121974618 : hi += size;
100 :
101 169309325 : if (lo > ls) {
102 23324091 : memcpy (b, ls, lo - ls);
103 23324091 : copied += (lo - ls);
104 23324091 : b += (lo - ls);
105 : }
106 :
107 169309325 : memcpy (b, hs, hi - hs);
108 169309325 : copied += (hi - hs);
109 169309325 : b += (hi - hs);
110 468232035 : } else if (copied) {
111 30065561 : memcpy (b, ls, lo - ls);
112 30065561 : copied += (lo - ls);
113 30065561 : b += (lo - ls);
114 :
115 : /* copy everything we needed to re-order back into array */
116 30065561 : memcpy (a1, buf, copied);
117 30065561 : return;
118 : } else {
119 : /* everything already in order */
120 438166474 : return;
121 : }
122 169309325 : } while (hi < ah);
123 :
124 115919673 : if (lo < am) {
125 115919673 : memcpy (b, lo, am - lo);
126 115919673 : copied += (am - lo);
127 : }
128 :
129 115919673 : memcpy (a1, buf, copied);
130 : }
131 :
132 : static int
133 93656850 : MergeSort (void *base, size_t nmemb, size_t size,
134 : int (* compare) (const void *, const void *, void *), void *arg)
135 : {
136 : void *tmp;
137 :
138 93656850 : if (nmemb < 2)
139 6281938 : return 0;
140 :
141 87374912 : if (!(tmp = malloc (nmemb * size))) {
142 0 : errno = ENOMEM;
143 0 : return -1;
144 : }
145 :
146 87374912 : msort (base, tmp, 0, nmemb - 1, size, compare, arg);
147 :
148 87374912 : free (tmp);
149 :
150 87374912 : return 0;
151 : }
152 :
153 93656850 : int isl_sort(void *const pbase, size_t total_elems, size_t size,
154 : int (*cmp)(const void *, const void *, void *arg), void *arg)
155 : {
156 93656850 : return MergeSort (pbase, total_elems, size, cmp, arg);
157 : }
|