Line data Source code
1 : /*
2 : * The code of this file was taken from http://jeffreystedfast.blogspot.be,
3 : * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
4 : * The MIT license text is as follows:
5 : *
6 : * Permission is hereby granted, free of charge, to any person obtaining a copy
7 : * of this software and associated documentation files (the "Software"), to
8 : * deal in the Software without restriction, including without limitation the
9 : * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 : * sell copies of the Software, and to permit persons to whom the Software is
11 : * furnished to do so, subject to the following conditions:
12 : *
13 : * The above copyright notice and this permission notice shall be included in
14 : * all copies or substantial portions of the Software.
15 : *
16 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 : * IN THE SOFTWARE.
23 : */
24 :
25 : #include <errno.h>
26 : #include <string.h>
27 : #include <stdlib.h>
28 : #include <isl_sort.h>
29 :
30 : #define MID(lo, hi) (lo + ((hi - lo) >> 1))
31 :
32 : /* The code here is an optimized merge sort. Starting from a generic merge sort
33 : * the following optimizations were applied:
34 : *
35 : * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
36 : * every element into a temporary buffer, blocks of elements are copied
37 : * at a time.
38 : *
39 : * o To reduce the number of memcpy() calls further, copying leading
40 : * and trailing elements into our temporary buffer is avoided, in case it is
41 : * not necessary to merge them.
42 : *
43 : * A further optimization could be to specialize memcpy calls based on the
44 : * size of the types we compare. For now, this code does not include the
45 : * relevant optimization, as clang e.g. inlines a very efficient memcpy()
46 : * implementation. It is not clear, that the specialized version as provided in
47 : * the blog post, is really superior to the one that will be inlined by
48 : * default. So we decided to keep the code simple until this optimization was
49 : * proven to be beneficial.
50 : */
51 :
52 : static void
53 1943360013 : msort (void *array, void *buf, size_t low, size_t high, size_t size,
54 : int (* compare) (const void *, const void *, void *), void *arg)
55 : {
56 : char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
57 1943360013 : size_t copied = 0;
58 : size_t mid;
59 :
60 1943360013 : mid = MID (low, high);
61 :
62 1943360013 : if (mid + 1 < high)
63 574762215 : msort (array, buf, mid + 1, high, size, compare, arg);
64 :
65 1943360013 : if (mid > low)
66 968391248 : msort (array, buf, low, mid, size, compare, arg);
67 :
68 1943360013 : ah = ((char *) array) + ((high + 1) * size);
69 1943360013 : am = ((char *) array) + ((mid + 1) * size);
70 1943360013 : a1 = al = ((char *) array) + (low * size);
71 :
72 1943360013 : b = (char *) buf;
73 1943360013 : lo = al;
74 1943360013 : hi = am;
75 :
76 : do {
77 2124717333 : ls = lo;
78 2124717333 : hs = hi;
79 :
80 2124717333 : if (lo > al || hi > am) {
81 : /* our last loop already compared lo & hi and found lo <= hi */
82 181357320 : lo += size;
83 : }
84 :
85 7209100466 : while (lo < am && compare (lo, hi, arg) <= 0)
86 2959665800 : lo += size;
87 :
88 2124717333 : if (lo < am) {
89 569506880 : if (copied == 0) {
90 : /* avoid copying the leading items */
91 496473584 : a1 = lo;
92 496473584 : ls = lo;
93 : }
94 :
95 : /* our last compare tells us hi < lo */
96 569506880 : hi += size;
97 :
98 1404323354 : while (hi < ah && compare (hi, lo, arg) < 0)
99 265309594 : hi += size;
100 :
101 569506880 : if (lo > ls) {
102 73033296 : memcpy (b, ls, lo - ls);
103 73033296 : copied += (lo - ls);
104 73033296 : b += (lo - ls);
105 : }
106 :
107 569506880 : memcpy (b, hs, hi - hs);
108 569506880 : copied += (hi - hs);
109 569506880 : b += (hi - hs);
110 1555210453 : } else if (copied) {
111 108324024 : memcpy (b, ls, lo - ls);
112 108324024 : copied += (lo - ls);
113 108324024 : b += (lo - ls);
114 :
115 : /* copy everything we needed to re-order back into array */
116 108324024 : memcpy (a1, buf, copied);
117 108324024 : return;
118 : } else {
119 : /* everything already in order */
120 1446886429 : return;
121 : }
122 569506880 : } while (hi < ah);
123 :
124 388149560 : if (lo < am) {
125 388149560 : memcpy (b, lo, am - lo);
126 388149560 : copied += (am - lo);
127 : }
128 :
129 388149560 : memcpy (a1, buf, copied);
130 : }
131 :
132 : static int
133 415776248 : MergeSort (void *base, size_t nmemb, size_t size,
134 : int (* compare) (const void *, const void *, void *), void *arg)
135 : {
136 : void *tmp;
137 :
138 415776248 : if (nmemb < 2)
139 15569698 : return 0;
140 :
141 400206550 : if (!(tmp = malloc (nmemb * size))) {
142 0 : errno = ENOMEM;
143 0 : return -1;
144 : }
145 :
146 400206550 : msort (base, tmp, 0, nmemb - 1, size, compare, arg);
147 :
148 400206550 : free (tmp);
149 :
150 400206550 : return 0;
151 : }
152 :
153 415776248 : int isl_sort(void *const pbase, size_t total_elems, size_t size,
154 : int (*cmp)(const void *, const void *, void *arg), void *arg)
155 : {
156 415776248 : return MergeSort (pbase, total_elems, size, cmp, arg);
157 : }
|