Line data Source code
1 : /* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc.
2 : This file is part of the GNU C Library.
3 : Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
4 :
5 : The GNU C Library is free software; you can redistribute it and/or
6 : modify it under the terms of the GNU Lesser General Public
7 : License as published by the Free Software Foundation; either
8 : version 2.1 of the License, or (at your option) any later version.
9 :
10 : The GNU C Library is distributed in the hope that it will be useful,
11 : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 : Lesser General Public License for more details.
14 :
15 : You should have received a copy of the GNU Lesser General Public
16 : License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */
17 :
18 : /* If you consider tuning this algorithm, you should consult first:
19 : Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
20 : Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
21 :
22 : /* Modified to be used in samba4 by
23 : * Simo Sorce <idra@samba.org> 2005
24 : */
25 :
26 : #include "ldb_private.h"
27 :
28 : /* Byte-wise swap two items of size SIZE. */
29 : #define SWAP(a, b, size) \
30 : do \
31 : { \
32 : register size_t __size = (size); \
33 : register char *__a = (a), *__b = (b); \
34 : do \
35 : { \
36 : char __tmp = *__a; \
37 : *__a++ = *__b; \
38 : *__b++ = __tmp; \
39 : } while (--__size > 0); \
40 : } while (0)
41 :
42 : /* Discontinue quicksort algorithm when partition gets below this size.
43 : This particular magic number was chosen to work best on a Sun 4/260. */
44 : #define MAX_THRESH 4
45 :
46 : /* Stack node declarations used to store unfulfilled partition obligations. */
47 : typedef struct
48 : {
49 : char *lo;
50 : char *hi;
51 : } stack_node;
52 :
53 : /* The next 4 #defines implement a very fast in-line stack abstraction. */
54 : /* The stack needs log (total_elements) entries (we could even subtract
55 : log(MAX_THRESH)). Since total_elements has type size_t, we get as
56 : upper bound for log (total_elements):
57 : bits per byte (CHAR_BIT) * sizeof(size_t). */
58 : #ifndef CHAR_BIT
59 : #define CHAR_BIT 8
60 : #endif
61 : #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
62 : #define PUSH(low, high) ((void) ((stack[i].lo = (low)), (stack[i].hi = (high)), i++))
63 : #define POP(low, high) ((void) (i--, (low = stack[i].lo), (high = stack[i].hi)))
64 :
65 :
66 : /* Order size using quicksort. This implementation incorporates
67 : four optimizations discussed in Sedgewick:
68 :
69 : 1. Non-recursive, using an explicit stack of pointer that store the
70 : next array partition to sort. To save time, this maximum amount
71 : of space required to store an array of SIZE_MAX is allocated on the
72 : stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
73 : only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
74 : Pretty cheap, actually.
75 :
76 : 2. Chose the pivot element using a median-of-three decision tree.
77 : This reduces the probability of selecting a bad pivot value and
78 : eliminates certain extraneous comparisons.
79 :
80 : 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
81 : insertion sort to order the MAX_THRESH items within each partition.
82 : This is a big win, since insertion sort is faster for small, mostly
83 : sorted array segments.
84 :
85 : 4. The larger of the two sub-partitions is always pushed onto the
86 : stack first, with the algorithm then concentrating on the
87 : smaller partition. This *guarantees* no more than log (total_elems)
88 : stack size is needed (actually O(1) in this case)! */
89 :
90 1033104 : void ldb_qsort (void *const pbase, size_t total_elems, size_t size,
91 : void *opaque, ldb_qsort_cmp_fn_t cmp)
92 : {
93 1033104 : register char *base_ptr = (char *) pbase;
94 :
95 1033104 : const size_t max_thresh = MAX_THRESH * size;
96 :
97 1033104 : if (total_elems == 0)
98 : /* Avoid lossage with unsigned arithmetic below. */
99 0 : return;
100 :
101 1033104 : if (total_elems > MAX_THRESH)
102 : {
103 1031387 : char *lo = base_ptr;
104 1031387 : char *hi = &lo[size * (total_elems - 1)];
105 83637 : stack_node stack[STACK_SIZE];
106 1031387 : size_t i = 0;
107 :
108 1031387 : PUSH (NULL, NULL);
109 :
110 411210 : do
111 : {
112 411210 : char *left_ptr;
113 411210 : char *right_ptr;
114 :
115 : /* Select median value from among LO, MID, and HI. Rearrange
116 : LO and HI so the three values are sorted. This lowers the
117 : probability of picking a pathological pivot value and
118 : skips a comparison for both the LEFT_PTR and RIGHT_PTR in
119 : the while loops. */
120 :
121 5286708 : char *mid = lo + size * ((hi - lo) / size >> 1);
122 :
123 5286708 : if ((*cmp) ((void *) mid, (void *) lo, opaque) < 0)
124 68129180 : SWAP (mid, lo, size);
125 5286708 : if ((*cmp) ((void *) hi, (void *) mid, opaque) < 0)
126 109565364 : SWAP (mid, hi, size);
127 : else
128 1741387 : goto jump_over;
129 3545321 : if ((*cmp) ((void *) mid, (void *) lo, opaque) < 0)
130 31794040 : SWAP (mid, lo, size);
131 3545321 : jump_over:;
132 :
133 5286708 : left_ptr = lo + size;
134 5286708 : right_ptr = hi - size;
135 :
136 : /* Here's the famous ``collapse the walls'' section of quicksort.
137 : Gotta like those tight inner loops! They are the main reason
138 : that this algorithm runs much faster than others. */
139 : do
140 : {
141 43476350 : while ((*cmp) ((void *) left_ptr, (void *) mid, opaque) < 0)
142 22462444 : left_ptr += size;
143 :
144 39138915 : while ((*cmp) ((void *) mid, (void *) right_ptr, opaque) < 0)
145 18125009 : right_ptr -= size;
146 :
147 21013906 : if (left_ptr < right_ptr)
148 : {
149 502662624 : SWAP (left_ptr, right_ptr, size);
150 17111952 : if (mid == left_ptr)
151 2698850 : mid = right_ptr;
152 14220397 : else if (mid == right_ptr)
153 1382869 : mid = left_ptr;
154 17111952 : left_ptr += size;
155 17111952 : right_ptr -= size;
156 : }
157 3901954 : else if (left_ptr == right_ptr)
158 : {
159 1015098 : left_ptr += size;
160 1015098 : right_ptr -= size;
161 1015098 : break;
162 : }
163 : }
164 19998808 : while (left_ptr <= right_ptr);
165 :
166 : /* Set up pointers for next iteration. First determine whether
167 : left and right partitions are below the threshold size. If so,
168 : ignore one or both. Otherwise, push the larger partition's
169 : bounds on the stack and continue sorting the smaller one. */
170 :
171 5286708 : if ((size_t) (right_ptr - lo) <= max_thresh)
172 : {
173 2902531 : if ((size_t) (hi - left_ptr) <= max_thresh)
174 : /* Ignore both small partitions. */
175 2162370 : POP (lo, hi);
176 : else
177 : /* Ignore small left partition. */
178 669404 : lo = left_ptr;
179 : }
180 2384177 : else if ((size_t) (hi - left_ptr) <= max_thresh)
181 : /* Ignore small right partition. */
182 1160850 : hi = right_ptr;
183 1130983 : else if ((right_ptr - lo) > (hi - left_ptr))
184 : {
185 : /* Push larger left partition indices. */
186 667269 : PUSH (lo, right_ptr);
187 667269 : lo = left_ptr;
188 : }
189 : else
190 : {
191 : /* Push larger right partition indices. */
192 463714 : PUSH (left_ptr, hi);
193 463714 : hi = right_ptr;
194 : }
195 : }
196 5286708 : while (i > 0 && i < STACK_SIZE);
197 : }
198 :
199 : /* Once the BASE_PTR array is partially sorted by quicksort the rest
200 : is completely sorted using insertion sort, since this is efficient
201 : for partitions below MAX_THRESH size. BASE_PTR points to the beginning
202 : of the array to sort, and END_PTR points at the very last element in
203 : the array (*not* one beyond it!). */
204 :
205 : #define min(x, y) ((x) < (y) ? (x) : (y))
206 :
207 : {
208 1033104 : char *const end_ptr = &base_ptr[size * (total_elems - 1)];
209 1033104 : char *tmp_ptr = base_ptr;
210 1033104 : char *thresh = min(end_ptr, base_ptr + max_thresh);
211 83640 : register char *run_ptr;
212 :
213 : /* Find smallest element in first threshold and place it at the
214 : array's beginning. This is the smallest array element,
215 : and the operation speeds up insertion sort's inner loop. */
216 :
217 5162188 : for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
218 4129084 : if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, opaque) < 0)
219 358414 : tmp_ptr = run_ptr;
220 :
221 1033104 : if (tmp_ptr != base_ptr)
222 7591744 : SWAP (tmp_ptr, base_ptr, size);
223 :
224 : /* Insertion sort, running from left-hand-side up to right-hand-side. */
225 :
226 949464 : run_ptr = base_ptr + size;
227 22547501 : while ((run_ptr += size) <= end_ptr)
228 : {
229 21514397 : tmp_ptr = run_ptr - size;
230 33672662 : while (tmp_ptr > base_ptr && (*cmp) ((void *) run_ptr, (void *) tmp_ptr, opaque) < 0)
231 12158265 : tmp_ptr -= size;
232 :
233 21514397 : tmp_ptr += size;
234 21514397 : if (tmp_ptr != run_ptr)
235 : {
236 619886 : char *trav;
237 :
238 8300294 : trav = run_ptr + size;
239 265763522 : while (--trav >= run_ptr)
240 : {
241 257463228 : char c = *trav;
242 19122228 : char *hi, *lo;
243 :
244 634290508 : for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
245 376827280 : *hi = *lo;
246 257463228 : *hi = c;
247 : }
248 : }
249 : }
250 : }
251 : }
|