Line data Source code
1 : /*
2 : * Copyright (C) Matthieu Suiche 2008
3 : *
4 : * All rights reserved.
5 : *
6 : * Redistribution and use in source and binary forms, with or without
7 : * modification, are permitted provided that the following conditions
8 : * are met:
9 : *
10 : * 1. Redistributions of source code must retain the above copyright
11 : * notice, this list of conditions and the following disclaimer.
12 : *
13 : * 2. Redistributions in binary form must reproduce the above copyright
14 : * notice, this list of conditions and the following disclaimer in the
15 : * documentation and/or other materials provided with the distribution.
16 : *
17 : * 3. Neither the name of the author nor the names of its contributors
18 : * may be used to endorse or promote products derived from this software
19 : * without specific prior written permission.
20 : *
21 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 : * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 : * SUCH DAMAGE.
32 : *
33 : */
34 :
35 : #include "replace.h"
36 : #include "lzxpress.h"
37 : #include "../lib/util/byteorder.h"
38 :
39 :
40 : #define __CHECK_BYTES(__size, __index, __needed) do { \
41 : if (unlikely(__index >= __size)) { \
42 : return -1; \
43 : } else { \
44 : uint32_t __avail = __size - __index; \
45 : if (unlikely(__needed > __avail)) { \
46 : return -1; \
47 : } \
48 : } \
49 : } while(0)
50 :
51 :
52 : /*
53 : * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding
54 : * matches will be.
55 : *
56 : * The window in which we look for matches is 8192 bytes. That means with
57 : * random data a value of 13 is getting close to no collisions, while a 12
58 : * will miss about half the possible matches. With compressible data there
59 : * will generally be fewer and less diverse entries, so collisions are rarer.
60 : *
61 : * In the testsuite, bith 12 and 13 give better compression than Windows, but
62 : * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12.
63 : */
64 : #define LZX_PLAIN_COMP_HASH_BITS 12
65 : /*
66 : * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the
67 : * circular hash table for a match, before we give up. A bigger number will
68 : * generally lead to better but slower compression, but a stupidly big number
69 : * will just be worse.
70 : */
71 : #define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5
72 : #define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1)
73 :
74 22635896 : static inline uint16_t three_byte_hash(const uint8_t *bytes)
75 : {
76 22635896 : uint16_t a = bytes[0];
77 22635896 : uint16_t b = bytes[1] ^ 0x2e;
78 22635896 : uint16_t c = bytes[2] ^ 0x55;
79 22635896 : uint16_t ca = c - a;
80 22635896 : uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a);
81 22635896 : return d & HASH_MASK;
82 : }
83 :
84 :
85 22635896 : static inline void store_match(uint32_t *hash_table,
86 : uint16_t h,
87 : uint32_t offset)
88 : {
89 22635896 : int i;
90 22635896 : uint32_t o = hash_table[h];
91 22635896 : uint16_t h2;
92 22635896 : uint16_t worst_h;
93 22635896 : int worst_score;
94 :
95 22635896 : if (o >= offset) {
96 : /* there is nothing there yet */
97 75989 : hash_table[h] = offset;
98 75989 : return;
99 : }
100 112598497 : for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
101 90108091 : h2 = (h + i) & HASH_MASK;
102 90108091 : if (hash_table[h2] >= offset) {
103 69501 : hash_table[h2] = offset;
104 69501 : return;
105 : }
106 : }
107 : /*
108 : * There are no slots, but we really want to store this, so we'll kick
109 : * out the one with the longest distance.
110 : */
111 22490406 : worst_h = h;
112 22490406 : worst_score = offset - o;
113 112452030 : for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
114 89961624 : int score;
115 89961624 : h2 = (h + i) & HASH_MASK;
116 89961624 : o = hash_table[h2];
117 89961624 : score = offset - o;
118 89961624 : if (score > worst_score) {
119 29063751 : worst_score = score;
120 29063751 : worst_h = h2;
121 : }
122 : }
123 22490406 : hash_table[worst_h] = offset;
124 : }
125 :
126 :
127 : struct match {
128 : const uint8_t *there;
129 : uint32_t length;
130 : };
131 :
132 :
133 22635896 : static inline struct match lookup_match(uint32_t *hash_table,
134 : uint16_t h,
135 : const uint8_t *data,
136 : uint32_t offset,
137 : size_t max_len)
138 : {
139 22635896 : int i;
140 22635896 : uint32_t o;
141 22635896 : uint16_t h2;
142 22635896 : size_t len;
143 22635896 : const uint8_t *there = NULL;
144 22635896 : const uint8_t *here = data + offset;
145 22635896 : struct match best = {0};
146 :
147 135234393 : for (i = 0; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
148 112743987 : h2 = (h + i) & HASH_MASK;
149 112743987 : o = hash_table[h2];
150 112743987 : if (o >= offset) {
151 : /*
152 : * Either this is 0xffffffff, or something is really
153 : * wrong.
154 : *
155 : * In setting this, we would never have stepped over
156 : * an 0xffffffff, so we won't now.
157 : */
158 0 : break;
159 : }
160 112598497 : if (offset - o > 8192) {
161 : /* Too far away to use */
162 4184082 : continue;
163 : }
164 108414415 : there = data + o;
165 : /*
166 : * When we already have a long match, we can try to avoid
167 : * measuring out another long, but shorter match.
168 : */
169 108414415 : if (best.length > 1000 &&
170 345 : there[best.length - 1] != best.there[best.length - 1]) {
171 153 : continue;
172 : }
173 :
174 0 : for (len = 0;
175 199241968 : len < max_len && here[len] == there[len];
176 90827706 : len++) {
177 : /* counting */
178 90827706 : }
179 108414262 : if (len > 2) {
180 3976197 : if (len > best.length) {
181 2019135 : best.length = len;
182 2019135 : best.there = there;
183 : }
184 : }
185 : }
186 22635896 : return best;
187 : }
188 :
189 : struct write_context {
190 : uint8_t *compressed;
191 : uint32_t compressed_pos;
192 : uint32_t max_compressed_size;
193 : uint32_t indic;
194 : uint32_t indic_bit;
195 : uint32_t indic_pos;
196 : uint32_t nibble_index;
197 : };
198 :
199 :
200 : #define CHECK_INPUT_BYTES(__needed) \
201 : __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
202 : #define CHECK_OUTPUT_BYTES(__needed) \
203 : __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed)
204 :
205 :
206 22635950 : static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit)
207 : {
208 22635950 : wc->indic = (wc->indic << 1) | bit;
209 22635950 : wc->indic_bit += 1;
210 :
211 22635950 : if (wc->indic_bit == 32) {
212 707349 : PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic);
213 707349 : wc->indic_bit = 0;
214 707349 : CHECK_OUTPUT_BYTES(sizeof(uint32_t));
215 707349 : wc->indic_pos = wc->compressed_pos;
216 707349 : wc->compressed_pos += sizeof(uint32_t);
217 : }
218 22635950 : return wc->indic_pos;
219 : }
220 :
221 :
222 1709716 : static ssize_t encode_match(struct write_context *wc,
223 : struct match match,
224 : const uint8_t *here)
225 : {
226 1709716 : uint32_t match_len = match.length - 3;
227 1709716 : uint32_t best_offset = here - match.there - 1;
228 1709716 : uint16_t metadata;
229 :
230 1709716 : if (best_offset > 8191) {
231 0 : return -1;
232 : }
233 :
234 1709716 : CHECK_OUTPUT_BYTES(sizeof(uint16_t));
235 1709716 : metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
236 1709716 : PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata);
237 1709716 : wc->compressed_pos += sizeof(uint16_t);
238 :
239 1709716 : if (match_len >= 7) {
240 60016 : match_len -= 7;
241 :
242 60016 : if (wc->nibble_index == 0) {
243 30026 : wc->nibble_index = wc->compressed_pos;
244 :
245 30026 : CHECK_OUTPUT_BYTES(sizeof(uint8_t));
246 30026 : wc->compressed[wc->nibble_index] = MIN(match_len, 15);
247 30026 : wc->compressed_pos += sizeof(uint8_t);
248 : } else {
249 29990 : wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4;
250 29990 : wc->nibble_index = 0;
251 : }
252 :
253 60016 : if (match_len >= 15) {
254 5331 : match_len -= 15;
255 :
256 5331 : CHECK_OUTPUT_BYTES(sizeof(uint8_t));
257 5331 : wc->compressed[wc->compressed_pos] = MIN(match_len, 255);
258 5331 : wc->compressed_pos += sizeof(uint8_t);
259 :
260 5331 : if (match_len >= 255) {
261 : /* Additional match_len */
262 :
263 2240 : match_len += 7 + 15;
264 :
265 2240 : if (match_len < (1 << 16)) {
266 2240 : CHECK_OUTPUT_BYTES(sizeof(uint16_t));
267 2240 : PUSH_LE_U16(wc->compressed, wc->compressed_pos,
268 : match_len);
269 2240 : wc->compressed_pos += sizeof(uint16_t);
270 : } else {
271 0 : CHECK_OUTPUT_BYTES(sizeof(uint16_t) +
272 : sizeof(uint32_t));
273 0 : PUSH_LE_U16(wc->compressed,
274 : wc->compressed_pos, 0);
275 0 : wc->compressed_pos += sizeof(uint16_t);
276 :
277 0 : PUSH_LE_U32(wc->compressed,
278 : wc->compressed_pos,
279 : match_len);
280 0 : wc->compressed_pos += sizeof(uint32_t);
281 : }
282 : }
283 : }
284 : }
285 1709716 : return push_indicator_bit(wc, 1);
286 : }
287 :
288 : #undef CHECK_OUTPUT_BYTES
289 : #define CHECK_OUTPUT_BYTES(__needed) \
290 : __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed)
291 :
292 :
293 74 : ssize_t lzxpress_compress(const uint8_t *uncompressed,
294 : uint32_t uncompressed_size,
295 : uint8_t *compressed,
296 : uint32_t max_compressed_size)
297 : {
298 : /*
299 : * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression".
300 : *
301 : * It avoids Huffman encoding by including literal bytes inline when a
302 : * match is not found. Every so often it includes a uint32 bit map
303 : * flagging which positions contain matches and which contain
304 : * literals. The encoding of matches is of variable size, depending on
305 : * the match length; they are always at least 16 bits long, and can
306 : * implicitly use unused half-bytes from earlier in the stream.
307 : */
308 74 : ssize_t ret;
309 74 : uint32_t uncompressed_pos;
310 74 : struct write_context wc = {
311 : .indic = 0,
312 : .indic_pos = 0,
313 : .indic_bit = 0,
314 : .nibble_index = 0,
315 : .compressed = compressed,
316 : .compressed_pos = 0,
317 : .max_compressed_size = max_compressed_size
318 : };
319 74 : uint32_t hash_table[1 << LZX_PLAIN_COMP_HASH_BITS];
320 74 : memset(hash_table, 0xff, sizeof(hash_table));
321 :
322 74 : if (!uncompressed_size) {
323 0 : return 0;
324 : }
325 :
326 72 : uncompressed_pos = 0;
327 72 : CHECK_OUTPUT_BYTES(sizeof(uint32_t));
328 72 : PUSH_LE_U32(wc.compressed, wc.compressed_pos, 0);
329 72 : wc.compressed_pos += sizeof(uint32_t);
330 :
331 22636022 : while ((uncompressed_pos < uncompressed_size) &&
332 22635950 : (wc.compressed_pos < wc.max_compressed_size)) {
333 :
334 : /* maximum len we can encode into metadata */
335 22635950 : const uint32_t max_len = MIN(0xFFFF + 3,
336 : uncompressed_size - uncompressed_pos);
337 22635950 : const uint8_t *here = uncompressed + uncompressed_pos;
338 22635950 : uint16_t h;
339 22635950 : struct match match = {0};
340 :
341 22635950 : if (max_len >= 3) {
342 22635896 : h = three_byte_hash(here);
343 22635896 : match = lookup_match(hash_table,
344 : h,
345 : uncompressed,
346 : uncompressed_pos,
347 : max_len);
348 :
349 22635896 : store_match(hash_table, h, uncompressed_pos);
350 : } else {
351 0 : match.there = NULL;
352 0 : match.length = 0;
353 : }
354 :
355 22635950 : if (match.there == NULL) {
356 : /*
357 : * This is going to be a literal byte, which we flag
358 : * by setting a bit in an indicator field somewhere
359 : * earlier in the stream.
360 : */
361 0 : CHECK_INPUT_BYTES(sizeof(uint8_t));
362 20926234 : CHECK_OUTPUT_BYTES(sizeof(uint8_t));
363 20926234 : wc.compressed[wc.compressed_pos++] = *here;
364 20926234 : uncompressed_pos++;
365 :
366 20926234 : ret = push_indicator_bit(&wc, 0);
367 20926234 : if (ret < 0) {
368 0 : return ret;
369 : }
370 : } else {
371 1709716 : ret = encode_match(&wc, match, here);
372 1709716 : if (ret < 0) {
373 0 : return ret;
374 : }
375 1709716 : uncompressed_pos += match.length;
376 : }
377 : }
378 :
379 72 : if (wc.indic_bit != 0) {
380 66 : wc.indic <<= 32 - wc.indic_bit;
381 : }
382 72 : wc.indic |= UINT32_MAX >> wc.indic_bit;
383 72 : PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic);
384 :
385 72 : return wc.compressed_pos;
386 : }
387 :
388 157 : ssize_t lzxpress_decompress(const uint8_t *input,
389 : uint32_t input_size,
390 : uint8_t *output,
391 : uint32_t max_output_size)
392 : {
393 : /*
394 : * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression
395 : * Algorithm Details".
396 : */
397 157 : uint32_t output_index, input_index;
398 157 : uint32_t indicator, indicator_bit;
399 157 : uint32_t nibble_index;
400 :
401 157 : if (input_size == 0) {
402 0 : return 0;
403 : }
404 :
405 0 : output_index = 0;
406 0 : input_index = 0;
407 0 : indicator = 0;
408 0 : indicator_bit = 0;
409 0 : nibble_index = 0;
410 :
411 : #undef CHECK_INPUT_BYTES
412 : #define CHECK_INPUT_BYTES(__needed) \
413 : __CHECK_BYTES(input_size, input_index, __needed)
414 : #undef CHECK_OUTPUT_BYTES
415 : #define CHECK_OUTPUT_BYTES(__needed) \
416 : __CHECK_BYTES(max_output_size, output_index, __needed)
417 :
418 24880070 : do {
419 24880070 : if (indicator_bit == 0) {
420 777595 : CHECK_INPUT_BYTES(sizeof(uint32_t));
421 777595 : indicator = PULL_LE_U32(input, input_index);
422 777595 : input_index += sizeof(uint32_t);
423 777595 : if (input_index == input_size) {
424 : /*
425 : * The compressor left room for indicator
426 : * flags for data that doesn't exist.
427 : */
428 0 : break;
429 : }
430 0 : indicator_bit = 32;
431 : }
432 24880068 : indicator_bit--;
433 :
434 : /*
435 : * check whether the bit specified by indicator_bit is set or not
436 : * set in indicator. For example, if indicator_bit has value 4
437 : * check whether the 4th bit of the value in indicator is set
438 : */
439 24880068 : if (((indicator >> indicator_bit) & 1) == 0) {
440 22805507 : CHECK_INPUT_BYTES(sizeof(uint8_t));
441 22805507 : CHECK_OUTPUT_BYTES(sizeof(uint8_t));
442 22805507 : output[output_index] = input[input_index];
443 22805507 : input_index += sizeof(uint8_t);
444 22805507 : output_index += sizeof(uint8_t);
445 : } else {
446 2074561 : uint32_t length;
447 2074561 : uint32_t offset;
448 :
449 2074561 : CHECK_INPUT_BYTES(sizeof(uint16_t));
450 2074561 : length = PULL_LE_U16(input, input_index);
451 2074561 : input_index += sizeof(uint16_t);
452 2074561 : offset = (length >> 3) + 1;
453 2074561 : length &= 7;
454 :
455 2074561 : if (length == 7) {
456 80507 : if (nibble_index == 0) {
457 40297 : CHECK_INPUT_BYTES(sizeof(uint8_t));
458 40297 : nibble_index = input_index;
459 40297 : length = input[input_index] & 0xf;
460 40297 : input_index += sizeof(uint8_t);
461 : } else {
462 40210 : length = input[nibble_index] >> 4;
463 40210 : nibble_index = 0;
464 : }
465 :
466 80507 : if (length == 15) {
467 9057 : CHECK_INPUT_BYTES(sizeof(uint8_t));
468 9057 : length = input[input_index];
469 9057 : input_index += sizeof(uint8_t);
470 9057 : if (length == 255) {
471 2754 : CHECK_INPUT_BYTES(sizeof(uint16_t));
472 2754 : length = PULL_LE_U16(input, input_index);
473 2754 : input_index += sizeof(uint16_t);
474 2754 : if (length == 0) {
475 4 : CHECK_INPUT_BYTES(sizeof(uint32_t));
476 4 : length = PULL_LE_U32(input, input_index);
477 4 : input_index += sizeof(uint32_t);
478 : }
479 :
480 2754 : if (length < (15 + 7)) {
481 0 : return -1;
482 : }
483 2754 : length -= (15 + 7);
484 : }
485 9057 : length += 15;
486 : }
487 80507 : length += 7;
488 : }
489 2074561 : length += 3;
490 :
491 2074561 : if (length == 0) {
492 0 : return -1;
493 : }
494 :
495 67912219 : for (; length > 0; --length) {
496 65837660 : if (offset > output_index) {
497 0 : return -1;
498 : }
499 65837660 : CHECK_OUTPUT_BYTES(sizeof(uint8_t));
500 65837658 : output[output_index] = output[output_index - offset];
501 65837658 : output_index += sizeof(uint8_t);
502 : }
503 : }
504 24880066 : } while ((output_index < max_output_size) && (input_index < (input_size)));
505 :
506 153 : return output_index;
507 : }
|