Line data Source code
1 : /*
2 : * Samba compression library - LGPLv3
3 : *
4 : * Copyright © Catalyst IT 2022
5 : *
6 : * Written by Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
7 : *
8 : * ** NOTE! The following LGPL license applies to this file.
9 : * ** It does NOT imply that all of Samba is released under the LGPL
10 : *
11 : * This library is free software; you can redistribute it and/or
12 : * modify it under the terms of the GNU Lesser General Public
13 : * License as published by the Free Software Foundation; either
14 : * version 3 of the License, or (at your option) any later version.
15 : *
16 : * This library is distributed in the hope that it will be useful,
17 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 : * Lesser General Public License for more details.
20 : *
21 : * You should have received a copy of the GNU Lesser General Public
22 : * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 : */
24 :
25 : #include <stdarg.h>
26 : #include <stddef.h>
27 : #include <setjmp.h>
28 : #include <cmocka.h>
29 : #include <stdbool.h>
30 : #include <sys/stat.h>
31 : #include "replace.h"
32 : #include <talloc.h>
33 : #include "lzxpress_huffman.h"
34 : #include "lib/util/stable_sort.h"
35 : #include "lib/util/data_blob.h"
36 :
37 : /* set LZXHUFF_DEBUG_FILES to true to save round-trip files in /tmp. */
38 : #define LZXHUFF_DEBUG_FILES false
39 :
40 : /* set LZXHUFF_DEBUG_VERBOSE to true to print more. */
41 : #define LZXHUFF_DEBUG_VERBOSE false
42 :
43 :
44 : #if LZXHUFF_DEBUG_VERBOSE
45 : #define debug_message(...) print_message(__VA_ARGS__)
46 :
47 : #include <time.h>
48 :
49 : struct timespec start = {0};
50 : struct timespec end = {0};
51 : static void debug_start_timer(void)
52 : {
53 : clock_gettime(CLOCK_MONOTONIC, &start);
54 : }
55 :
56 : static void debug_end_timer(const char *name, size_t len)
57 : {
58 : uint64_t ns;
59 : double secs;
60 : double rate;
61 : clock_gettime(CLOCK_MONOTONIC, &end);
62 : ns = end.tv_nsec;
63 : ns += end.tv_sec * 1000 * 1000 * 1000;
64 : ns -= start.tv_nsec;
65 : ns -= start.tv_sec * 1000 * 1000 * 1000;
66 : secs = ns / 1e9;
67 : rate = len / (secs * 1024 * 1024);
68 : debug_message("%s %zu bytes in %.2g: \033[1;35m%.2f\033[0m MB per second\n",
69 : name, len, secs, rate);
70 : }
71 :
72 : #else
73 : #define debug_message(...) /* debug_message */
74 : #define debug_start_timer(...) /* debug_start_timer */
75 : #define debug_end_timer(...) /* debug_end_timer */
76 : #endif
77 :
78 :
79 : struct lzx_pair {
80 : const char *name;
81 : DATA_BLOB compressed;
82 : DATA_BLOB decompressed;
83 : };
84 :
85 : struct lzx_file_pair {
86 : const char *name;
87 : const char *compressed_file;
88 : const char *decompressed_file;
89 : };
90 :
91 :
92 : #define DECOMP_DIR "testdata/compression/decompressed"
93 : #define COMP_DIR "testdata/compression/compressed-huffman"
94 : #define MORE_COMP_DIR "testdata/compression/compressed-more-huffman"
95 :
96 :
97 : #define VARRGH(...) __VA_ARGS__
98 :
99 : #define BLOB_FROM_ARRAY(...) \
100 : { \
101 : .data = (uint8_t[]){__VA_ARGS__}, \
102 : .length = sizeof((uint8_t[]){__VA_ARGS__}) \
103 : }
104 :
105 : #define BLOB_FROM_STRING(s) \
106 : { \
107 : .data = discard_const_p(uint8_t, s), \
108 : .length = (sizeof(s) - 1) \
109 : }
110 :
111 :
112 : const char * file_names[] = {
113 : "27826-8.txt",
114 : "5d049b4cb1bd933f5e8ex19",
115 : "638e61e96d54279981c3x5",
116 : "64k-minus-one-zeros",
117 : "64k-plus-one-zeros",
118 : "64k-zeros",
119 : "96f696a4e5ce56c61a3dx10",
120 : "9e0b6a12febf38e98f13",
121 : "abc-times-101",
122 : "abc-times-105",
123 : "abc-times-200",
124 : "and_rand",
125 : "and_rand-128k+",
126 : "b63289ccc7f218c0d56b",
127 : "beta-variate1-128k+",
128 : "beta-variate2-128k+",
129 : "beta-variate3-128k+",
130 : "decayed_alphabet_128k+",
131 : "decayed_alphabet_64k",
132 : "exp_shuffle",
133 : "exp_shuffle-128k+",
134 : "f00842317dc6d5695b02",
135 : "fib_shuffle",
136 : "fib_shuffle-128k+",
137 : "fuzzing-0fc2d461b56cd8103c91",
138 : "fuzzing-17c961778538cc10ab7c",
139 : "fuzzing-3591f9dc02bb00a54b60",
140 : "fuzzing-3ec3bca27bb9eb40c128",
141 : "fuzzing-80b4fa18ff5f8dd04862",
142 : "fuzzing-a3115a81d1ac500318f9",
143 : "generate-windows-test-vectors.c",
144 : "midsummer-nights-dream.txt",
145 : "notes-on-the-underground.txt",
146 : "pg22009.txt",
147 : "repeating",
148 : "repeating-exactly-64k",
149 : "setup.log",
150 : "skewed_choices",
151 : "skewed_choices-128k+",
152 : /* These ones were deathly slow in fuzzing at one point */
153 : "slow-015ddc36a71412ccc50d",
154 : "slow-100e9f966a7feb9ca40a",
155 : "slow-2a671c3cff4f1574cbab",
156 : "slow-33d90a24e70515b14cd0",
157 : "slow-49d8c05261e3f412fc72",
158 : "slow-50a249d2fe56873e56a0",
159 : "slow-63e9f0b52235fb0129fa",
160 : "slow-73b7f971d65908ac0095",
161 : "slow-8b61e3dd267908544531",
162 : "slow-9d1c5a079b0462986f1f",
163 : "slow-aa7262a821dabdcf04a6",
164 : "slow-b8a91d142b0d2af7f5ca",
165 : "slow-c79142457734bbc8d575",
166 : "slow-d736544545b90d83fe75",
167 : "slow-e3b9bdfaed7d1a606fdb",
168 : "slow-f3f1c02a9d006e5e1703",
169 : "square_series",
170 : "square_series-128k+",
171 : "trigram_128k+",
172 : "trigram_64k",
173 : "trigram_sum_128k+",
174 : "trigram_sum_64k",
175 : NULL
176 : };
177 :
178 : struct lzx_pair bidirectional_pairs[] = {
179 :
180 : {.name = "abc__100_repeats", /* [MS-XCA] 3.2 example 2. */
181 : .decompressed = BLOB_FROM_STRING(
182 : "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
183 : "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
184 : "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
185 : "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
186 : "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
187 : ),
188 : .compressed = BLOB_FROM_ARRAY(
189 : /*
190 : * The 'a', 'b', and 'c' bytes are 0x61, 0x62, 0x63. No other
191 : * symbols occur. That means we need 48 0x00 bytes for the
192 : * first 96 symbol-nybbles, then some short codes, then zeros
193 : * again for the rest of the literals.
194 : */
195 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
196 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
197 : 0,0,0,0,0, 0,0,0,
198 : 0x30, 0x23, /* a_ cb */
199 : 0,0,0,0,0, 0,0,0,0,0,
200 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
201 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 100 bytes */
202 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
203 : 0,0,0,0,0, 0,0,0, /* here end the 0-255 literals (128 bytes) */
204 : 0x02, /* 'EOF' symbol 256 (byte 128 low) */
205 : 0,0,0,0,0, 0,0,0,0,0, 0, /* 140 bytes */
206 : 0,0,0,
207 : 0x20, /* codepoint 287 (byte 143 high) */
208 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0, /* 160 bytes */
209 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
210 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
211 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
212 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 240 bytes */
213 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,
214 : /*
215 : * So that's the tree.
216 : *
217 : * length 2 codes for 'c', EOF, 287
218 : * length 3 for 'a', 'b'.
219 : *
220 : * c: 00
221 : * EOF: 01
222 : * 287: 10
223 : * a: 110
224 : * b: 111
225 : *
226 : * thus the literal string "abc" is 110-111-00.
227 : *
228 : * Now for the lz77 match definitions for EOF and 287.
229 : *
230 : * Why 287? It encodes the match distance and offset.
231 : *
232 : * 287 - 256 = 31
233 : *
234 : * _length = 31 % 16 = 15
235 : * _distance = 31 / 16 = 1
236 : *
237 : * (it's easier to look at the hex, 0x11f:
238 : * 1xx means a match; x1x is _distance; xxf is _length)
239 : *
240 : * _distance 1 means a two bit distance (10 or 11; 2 or 3).
241 : * That means the next bit will be the least significant bit
242 : * of distance (1 in this case, meaning distance 3).
243 : *
244 : * if _length is 15, real length is included inline.
245 : *
246 : * 'EOF' == 256 means _length = 0, _distance = 0.
247 : *
248 : * _distance 0 means 1, so no further bits needed.
249 : * _length 0 means length 3.
250 : *
251 : * but when used as EOF, this doesn't matter.
252 : */
253 : 0xa8, 0xdc, 0x00, 0x00, 0xff, 0x26, 0x01
254 : /* These remaining bytes are:
255 : *
256 : * 10101000 11011100 00000000 00000000 11111111
257 : * 00100110 00000001
258 : *
259 : * and we read them as 16 chunks (i.e. flipping odd/even order)
260 : *
261 : * 110-111-00 10-1-01-000
262 : * a b c 287 | EOF
263 : * |
264 : * this is the 287 distance low bit.
265 : *
266 : * The last 3 bits are not used. The 287 length is sort of
267 : * out of band, coming up soon (because 287 encodes length
268 : * 15; most codes do not do this).
269 : *
270 : * 00000000 00000000
271 : *
272 : * This padding is there because the first 32 bits are read
273 : * at the beginning of decoding. If there were more things to
274 : * be encoded, they would be in here.
275 : *
276 : * 11111111
277 : *
278 : * This byte is pulled as the length for the 287 match.
279 : * Because it is 0xff, we pull a further 2 bytes for the
280 : * actual length, i.e. a 16 bit number greater than 270.
281 : *
282 : * 00000001 00100110
283 : *
284 : * that is 0x126 = 294 = the match length - 3 (because we're
285 : * encoding ["abc", <copy from 3 back, 297 chars>, EOF]).
286 : *
287 : */
288 : )
289 : },
290 : {.name = "abcdefghijklmnopqrstuvwxyz", /* [MS-XCA] 3.2 example 1. */
291 : .decompressed = BLOB_FROM_STRING("abcdefghijklmnopqrstuvwxyz"),
292 : .compressed = BLOB_FROM_ARRAY(
293 : /*
294 : * In this case there are no matches encoded as there are no
295 : * repeated symbols. Including the EOF, there are 27 symbols
296 : * all occurring exactly as frequently as each other (once).
297 : * From that we would expect the codes to be mostly 5 bits
298 : * long, because 27 < 2^5 (32), but greater than 2^4. And
299 : * that's what we see.
300 : */
301 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
302 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
303 : 0,0,0,0,0, 0,0,0,
304 : /* 14 non-zero bytes for 26 letters/nibbles */
305 : 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
306 : 0x55, 0x55, 0x55, 0x45, 0x44, 0x04,
307 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0, /* 80 */
308 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
309 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
310 : 0,0,0,0,0, 0,0,0,
311 : 0x04, /* 0x100 EOF */
312 : /* no matches */
313 : 0,0,0,0,0, 0,0,0,0,0, 0, /* 140 */
314 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
315 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
316 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
317 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
318 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 240 */
319 : 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,
320 :
321 : 0xd8, 0x52, 0x3e, 0xd7, 0x94, 0x11, 0x5b, 0xe9,
322 : 0x19, 0x5f, 0xf9, 0xd6, 0x7c, 0xdf, 0x8d, 0x04,
323 : 0x00, 0x00, 0x00, 0x00)
324 : },
325 : {0}
326 : };
327 :
328 :
329 1 : static void test_lzxpress_huffman_decompress(void **state)
330 : {
331 1 : size_t i;
332 1 : ssize_t written;
333 1 : uint8_t *dest = NULL;
334 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
335 4 : for (i = 0; bidirectional_pairs[i].name != NULL; i++) {
336 2 : struct lzx_pair p = bidirectional_pairs[i];
337 2 : dest = talloc_array(mem_ctx, uint8_t, p.decompressed.length);
338 :
339 : debug_message("%s compressed %zu decomp %zu\n", p.name,
340 : p.compressed.length,
341 2 : p.decompressed.length);
342 :
343 2 : written = lzxpress_huffman_decompress(p.compressed.data,
344 : p.compressed.length,
345 : dest,
346 : p.decompressed.length);
347 2 : assert_int_not_equal(written, -1);
348 2 : assert_int_equal(written, p.decompressed.length);
349 :
350 2 : assert_memory_equal(dest, p.decompressed.data, p.decompressed.length);
351 2 : talloc_free(dest);
352 : }
353 1 : }
354 :
355 1 : static void test_lzxpress_huffman_compress(void **state)
356 : {
357 1 : size_t i;
358 1 : ssize_t written;
359 1 : uint8_t *dest = NULL;
360 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
361 4 : for (i = 0; bidirectional_pairs[i].name != NULL; i++) {
362 2 : struct lzx_pair p = bidirectional_pairs[i];
363 : debug_message("%s compressed %zu decomp %zu\n", p.name,
364 : p.compressed.length,
365 2 : p.decompressed.length);
366 :
367 2 : written = lzxpress_huffman_compress_talloc(mem_ctx,
368 : p.decompressed.data,
369 : p.decompressed.length,
370 : &dest);
371 :
372 2 : assert_int_not_equal(written, -1);
373 2 : assert_int_equal(written, p.compressed.length);
374 2 : assert_memory_equal(dest, p.compressed.data, p.compressed.length);
375 2 : talloc_free(dest);
376 : }
377 1 : }
378 :
379 :
380 367 : static DATA_BLOB datablob_from_file(TALLOC_CTX *mem_ctx,
381 : const char *filename)
382 : {
383 367 : DATA_BLOB b = {0};
384 367 : FILE *fh = fopen(filename, "rb");
385 367 : int ret;
386 367 : struct stat s;
387 367 : size_t len;
388 367 : if (fh == NULL) {
389 22 : debug_message("could not open '%s'\n", filename);
390 22 : return b;
391 : }
392 345 : ret = fstat(fileno(fh), &s);
393 345 : if (ret != 0) {
394 0 : fclose(fh);
395 0 : return b;
396 : }
397 345 : b.data = talloc_array(mem_ctx, uint8_t, s.st_size);
398 345 : if (b.data == NULL) {
399 0 : fclose(fh);
400 0 : return b;
401 : }
402 345 : len = fread(b.data, 1, s.st_size, fh);
403 345 : if (ferror(fh) || len != s.st_size) {
404 0 : TALLOC_FREE(b.data);
405 : } else {
406 : b.length = len;
407 : }
408 345 : fclose(fh);
409 345 : return b;
410 : }
411 :
412 :
413 :
414 1 : static void test_lzxpress_huffman_decompress_files(void **state)
415 : {
416 1 : size_t i;
417 1 : int score = 0;
418 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
419 63 : for (i = 0; file_names[i] != NULL; i++) {
420 61 : char filename[200];
421 61 : uint8_t *dest = NULL;
422 61 : ssize_t written;
423 61 : TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
424 61 : struct lzx_pair p = {
425 61 : .name = file_names[i]
426 : };
427 :
428 61 : debug_message("%s\n", p.name);
429 :
430 61 : snprintf(filename, sizeof(filename),
431 : "%s/%s.decomp", DECOMP_DIR, p.name);
432 :
433 61 : p.decompressed = datablob_from_file(tmp_ctx, filename);
434 61 : assert_non_null(p.decompressed.data);
435 :
436 61 : snprintf(filename, sizeof(filename),
437 : "%s/%s.lzhuff", COMP_DIR, p.name);
438 :
439 61 : p.compressed = datablob_from_file(tmp_ctx, filename);
440 61 : assert_non_null(p.compressed.data);
441 :
442 61 : dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
443 61 : debug_start_timer();
444 61 : written = lzxpress_huffman_decompress(p.compressed.data,
445 : p.compressed.length,
446 : dest,
447 : p.decompressed.length);
448 61 : debug_end_timer("decompress", p.decompressed.length);
449 61 : if (written != -1 &&
450 61 : written == p.decompressed.length &&
451 61 : memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
452 61 : debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
453 61 : score++;
454 : } else {
455 : debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
456 61 : p.name);
457 : debug_message("size %zd vs reference %zu\n",
458 61 : written, p.decompressed.length);
459 : }
460 61 : talloc_free(tmp_ctx);
461 : }
462 1 : debug_message("%d/%zu correct\n", score, i);
463 1 : assert_int_equal(score, i);
464 1 : }
465 :
466 :
467 1 : static void test_lzxpress_huffman_decompress_more_compressed_files(void **state)
468 : {
469 : /*
470 : * This tests the decompression of files that have been compressed on
471 : * Windows with the level turned up (to 1, default for MS-XCA is 0).
472 : *
473 : * The format is identical, but it will have tried harder to find
474 : * matches.
475 : */
476 1 : size_t i;
477 1 : int score = 0;
478 1 : int found = 0;
479 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
480 63 : for (i = 0; file_names[i] != NULL; i++) {
481 61 : char filename[200];
482 61 : uint8_t *dest = NULL;
483 61 : ssize_t written;
484 61 : TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
485 61 : struct lzx_pair p = {
486 61 : .name = file_names[i]
487 : };
488 :
489 61 : debug_message("%s\n", p.name);
490 :
491 61 : snprintf(filename, sizeof(filename),
492 : "%s/%s.decomp", DECOMP_DIR, p.name);
493 :
494 61 : p.decompressed = datablob_from_file(tmp_ctx, filename);
495 61 : assert_non_null(p.decompressed.data);
496 :
497 61 : snprintf(filename, sizeof(filename),
498 : "%s/%s.lzhuff", MORE_COMP_DIR, p.name);
499 :
500 61 : p.compressed = datablob_from_file(tmp_ctx, filename);
501 61 : if (p.compressed.data == NULL) {
502 : /*
503 : * We don't have all the vectors in the
504 : * more-compressed directory, which is OK, we skip
505 : * them.
506 : */
507 22 : continue;
508 : }
509 39 : found++;
510 39 : dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
511 39 : debug_start_timer();
512 39 : written = lzxpress_huffman_decompress(p.compressed.data,
513 : p.compressed.length,
514 : dest,
515 : p.decompressed.length);
516 39 : debug_end_timer("decompress", p.decompressed.length);
517 39 : if (written == p.decompressed.length &&
518 39 : memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
519 39 : debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
520 39 : score++;
521 : } else {
522 : debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
523 39 : p.name);
524 : debug_message("size %zd vs reference %zu\n",
525 39 : written, p.decompressed.length);
526 : }
527 39 : talloc_free(tmp_ctx);
528 : }
529 1 : debug_message("%d/%d correct\n", score, found);
530 1 : assert_int_equal(score, found);
531 1 : }
532 :
533 :
534 : /*
535 : * attempt_round_trip() tests whether a data blob can survive a compression
536 : * and decompression cycle. If save_name is not NULL and LZXHUFF_DEBUG_FILES
537 : * evals to true, the various stages are saved in files with that name and the
538 : * '-original', '-compressed', and '-decompressed' suffixes. If ref_compressed
539 : * has data, it'll print a message saying whether the compressed data matches
540 : * that.
541 : */
542 :
543 299 : static ssize_t attempt_round_trip(TALLOC_CTX *mem_ctx,
544 : DATA_BLOB original,
545 : const char *save_name,
546 : DATA_BLOB ref_compressed)
547 : {
548 299 : TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
549 299 : DATA_BLOB compressed = data_blob_talloc(tmp_ctx, NULL,
550 : original.length * 4 / 3 + 260);
551 299 : DATA_BLOB decompressed = data_blob_talloc(tmp_ctx, NULL,
552 : original.length);
553 299 : ssize_t comp_written, decomp_written;
554 299 : debug_start_timer();
555 299 : comp_written = lzxpress_huffman_compress_talloc(tmp_ctx,
556 : original.data,
557 : original.length,
558 : &compressed.data);
559 299 : debug_end_timer("compress", original.length);
560 299 : if (comp_written <= 0) {
561 0 : talloc_free(tmp_ctx);
562 0 : return -1;
563 : }
564 :
565 299 : if (ref_compressed.data != NULL) {
566 : /*
567 : * This is informational, not an assertion; there are
568 : * ~infinite legitimate ways to compress the data, many as
569 : * good as each other (think of compression as a language, not
570 : * a format).
571 : */
572 : debug_message("compressed size %zd vs reference %zu\n",
573 : comp_written, ref_compressed.length);
574 :
575 : if (comp_written == compressed.length &&
576 : memcmp(compressed.data, ref_compressed.data, comp_written) == 0) {
577 299 : debug_message("\033[1;32mbyte identical!\033[0m\n");
578 : }
579 : }
580 299 : debug_start_timer();
581 299 : decomp_written = lzxpress_huffman_decompress(compressed.data,
582 : comp_written,
583 : decompressed.data,
584 : original.length);
585 299 : debug_end_timer("decompress", original.length);
586 299 : if (save_name != NULL && LZXHUFF_DEBUG_FILES) {
587 : char s[300];
588 : FILE *fh = NULL;
589 :
590 : snprintf(s, sizeof(s), "%s-original", save_name);
591 : fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
592 : fh = fopen(s, "w");
593 : fwrite(original.data, 1, original.length, fh);
594 : fclose(fh);
595 :
596 : snprintf(s, sizeof(s), "%s-compressed", save_name);
597 : fprintf(stderr, "Saving %zu bytes to %s\n", comp_written, s);
598 : fh = fopen(s, "w");
599 : fwrite(compressed.data, 1, comp_written, fh);
600 : fclose(fh);
601 : /*
602 : * We save the decompressed file using original.length, not
603 : * the returned size. If these differ, the returned size will
604 : * be -1. By saving the whole buffer we can see at what point
605 : * it went haywire.
606 : */
607 : snprintf(s, sizeof(s), "%s-decompressed", save_name);
608 : fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
609 : fh = fopen(s, "w");
610 : fwrite(decompressed.data, 1, original.length, fh);
611 : fclose(fh);
612 : }
613 :
614 299 : if (original.length != decomp_written ||
615 299 : memcmp(decompressed.data,
616 : original.data,
617 : original.length) != 0) {
618 : debug_message("\033[1;31mgot %zd, expected %zu\033[0m\n",
619 : decomp_written,
620 0 : original.length);
621 0 : talloc_free(tmp_ctx);
622 0 : return -1;
623 : }
624 299 : talloc_free(tmp_ctx);
625 299 : return comp_written;
626 : }
627 :
628 :
629 1 : static void test_lzxpress_huffman_round_trip(void **state)
630 : {
631 1 : size_t i;
632 1 : int score = 0;
633 1 : ssize_t compressed_total = 0;
634 1 : ssize_t reference_total = 0;
635 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
636 63 : for (i = 0; file_names[i] != NULL; i++) {
637 61 : char filename[200];
638 61 : char *debug_files = NULL;
639 61 : TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
640 61 : ssize_t comp_size;
641 61 : struct lzx_pair p = {
642 61 : .name = file_names[i]
643 : };
644 61 : debug_message("-------------------\n");
645 61 : debug_message("%s\n", p.name);
646 :
647 61 : snprintf(filename, sizeof(filename),
648 : "%s/%s.decomp", DECOMP_DIR, p.name);
649 :
650 61 : p.decompressed = datablob_from_file(tmp_ctx, filename);
651 61 : assert_non_null(p.decompressed.data);
652 :
653 61 : snprintf(filename, sizeof(filename),
654 : "%s/%s.lzhuff", COMP_DIR, p.name);
655 :
656 61 : p.compressed = datablob_from_file(tmp_ctx, filename);
657 61 : if (p.compressed.data == NULL) {
658 : debug_message(
659 : "Could not load %s reference file %s\n",
660 : p.name, filename);
661 : debug_message("%s decompressed %zu\n", p.name,
662 : p.decompressed.length);
663 : } else {
664 : debug_message("%s: reference compressed %zu decomp %zu\n",
665 : p.name,
666 : p.compressed.length,
667 61 : p.decompressed.length);
668 : }
669 61 : if (1) {
670 : /*
671 : * We're going to save copies in /tmp.
672 : */
673 61 : snprintf(filename, sizeof(filename),
674 : "/tmp/lzxhuffman-%s", p.name);
675 61 : debug_files = filename;
676 : }
677 :
678 61 : comp_size = attempt_round_trip(mem_ctx, p.decompressed,
679 : debug_files,
680 : p.compressed);
681 61 : if (comp_size > 0) {
682 61 : debug_message("\033[1;32mround trip!\033[0m\n");
683 61 : score++;
684 61 : if (p.compressed.length) {
685 61 : compressed_total += comp_size;
686 61 : reference_total += p.compressed.length;
687 : }
688 : }
689 61 : talloc_free(tmp_ctx);
690 : }
691 1 : debug_message("%d/%zu correct\n", score, i);
692 1 : print_message("\033[1;34mtotal compressed size: %zu\033[0m\n",
693 : compressed_total);
694 1 : print_message("total reference size: %zd \n", reference_total);
695 1 : print_message("diff: %7zd \n",
696 : reference_total - compressed_total);
697 1 : assert_true(reference_total != 0);
698 1 : print_message("ratio: \033[1;3%dm%.2f\033[0m \n",
699 : 2 + (compressed_total >= reference_total),
700 1 : ((double)compressed_total) / reference_total);
701 : /*
702 : * Assert that the compression is *about* as good as Windows. Of course
703 : * it doesn't matter if we do better, but mysteriously getting better
704 : * is usually a sign that something is wrong.
705 : *
706 : * At the time of writing, compressed_total is 2674004, or 10686 more
707 : * than the Windows reference total. That's < 0.5% difference, we're
708 : * asserting at 2%.
709 : */
710 1 : assert_true(labs(compressed_total - reference_total) <
711 : compressed_total / 50);
712 :
713 1 : assert_int_equal(score, i);
714 1 : talloc_free(mem_ctx);
715 1 : }
716 :
717 : /*
718 : * Bob Jenkins' Small Fast RNG.
719 : *
720 : * We don't need it to be this good, but we do need it to be reproduceable
721 : * across platforms, which rand() etc aren't.
722 : *
723 : * http://burtleburtle.net/bob/rand/smallprng.html
724 : */
725 :
726 : struct jsf_rng {
727 : uint32_t a;
728 : uint32_t b;
729 : uint32_t c;
730 : uint32_t d;
731 : };
732 :
733 : #define ROTATE32(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
734 :
735 26542191 : static uint32_t jsf32(struct jsf_rng *x) {
736 26542191 : uint32_t e = x->a - ROTATE32(x->b, 27);
737 26542191 : x->a = x->b ^ ROTATE32(x->c, 17);
738 26542191 : x->b = x->c + x->d;
739 26542191 : x->c = x->d + e;
740 26542191 : x->d = e + x->a;
741 26542191 : return x->d;
742 : }
743 :
744 5 : static void jsf32_init(struct jsf_rng *x, uint32_t seed) {
745 5 : size_t i;
746 5 : x->a = 0xf1ea5eed;
747 5 : x->b = x->c = x->d = seed;
748 126 : for (i = 0; i < 20; ++i) {
749 120 : jsf32(x);
750 : }
751 : }
752 :
753 :
754 1 : static void test_lzxpress_huffman_long_gpl_round_trip(void **state)
755 : {
756 : /*
757 : * We use a kind of model-free Markov model to generate a massively
758 : * extended pastiche of the GPLv3 (chosen because it is right there in
759 : * "COPYING" and won't change often).
760 : *
761 : * The point is to check a round trip of a very long message with
762 : * multiple repetitions on many scales, without having to add a very
763 : * large file.
764 : */
765 1 : size_t i, j, k;
766 1 : uint8_t c;
767 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
768 1 : DATA_BLOB gpl = datablob_from_file(mem_ctx, "COPYING");
769 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
770 1 : DATA_BLOB ref = {0};
771 1 : ssize_t comp_size;
772 1 : struct jsf_rng rng;
773 :
774 1 : if (gpl.data == NULL) {
775 0 : print_message("could not read COPYING\n");
776 0 : fail();
777 : }
778 :
779 1 : jsf32_init(&rng, 1);
780 :
781 1 : j = 1;
782 1 : original.data[0] = gpl.data[0];
783 5242880 : for (i = 1; i < original.length; i++) {
784 5242879 : size_t m;
785 5242879 : char p = original.data[i - 1];
786 5242879 : c = gpl.data[j];
787 5242879 : original.data[i] = c;
788 5242879 : j++;
789 5242879 : m = (j + jsf32(&rng)) % (gpl.length - 50);
790 147802506 : for (k = m; k < m + 30; k++) {
791 143460910 : if (p == gpl.data[k] &&
792 9104961 : c == gpl.data[k + 1]) {
793 901283 : j = k + 2;
794 901283 : break;
795 : }
796 : }
797 5242879 : if (j == gpl.length) {
798 62 : j = 1;
799 : }
800 : }
801 :
802 1 : comp_size = attempt_round_trip(mem_ctx, original, "/tmp/gpl", ref);
803 1 : assert_true(comp_size > 0);
804 1 : assert_true(comp_size < original.length);
805 :
806 1 : talloc_free(mem_ctx);
807 1 : }
808 :
809 :
810 2 : static void test_lzxpress_huffman_long_random_graph_round_trip(void **state)
811 : {
812 2 : size_t i;
813 2 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
814 2 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
815 2 : DATA_BLOB ref = {0};
816 : /*
817 : * There's a random trigram graph, with each pair of sequential bytes
818 : * pointing to a successor. This would probably fall into a fairly
819 : * simple loop, but we introduce damage into the system, randomly
820 : * flipping about 1 bit in 64.
821 : *
822 : * The result is semi-structured and compressible.
823 : */
824 2 : uint8_t *d = original.data;
825 2 : uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
826 2 : uint32_t *table32 = (void*)table;
827 2 : ssize_t comp_size;
828 2 : struct jsf_rng rng;
829 :
830 2 : jsf32_init(&rng, 1);
831 32770 : for (i = 0; i < (65536 / 4); i++) {
832 32768 : table32[i] = jsf32(&rng);
833 : }
834 :
835 2 : d[0] = 'a';
836 2 : d[1] = 'b';
837 :
838 10485758 : for (i = 2; i < original.length; i++) {
839 10485756 : uint16_t k = (d[i - 2] << 8) | d[i - 1];
840 10485756 : uint32_t damage = jsf32(&rng) & jsf32(&rng) & jsf32(&rng);
841 10485756 : damage &= (damage >> 16);
842 10485756 : k ^= damage & 0xffff;
843 10485756 : d[i] = table[k];
844 : }
845 :
846 2 : comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-graph", ref);
847 2 : assert_true(comp_size > 0);
848 2 : assert_true(comp_size < original.length);
849 :
850 2 : talloc_free(mem_ctx);
851 2 : }
852 :
853 :
854 1 : static void test_lzxpress_huffman_chaos_graph_round_trip(void **state)
855 : {
856 1 : size_t i;
857 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
858 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
859 1 : DATA_BLOB ref = {0};
860 : /*
861 : * There's a random trigram graph, with each pair of sequential bytes
862 : * pointing to a successor. This would probably fall into a fairly
863 : * simple loop, but we keep changing the graph. The result is long
864 : * periods of stability separatd by bursts of noise.
865 : */
866 1 : uint8_t *d = original.data;
867 1 : uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
868 1 : uint32_t *table32 = (void*)table;
869 1 : ssize_t comp_size;
870 1 : struct jsf_rng rng;
871 :
872 1 : jsf32_init(&rng, 1);
873 16385 : for (i = 0; i < (65536 / 4); i++) {
874 16384 : table32[i] = jsf32(&rng);
875 : }
876 :
877 1 : d[0] = 'a';
878 1 : d[1] = 'b';
879 :
880 5242879 : for (i = 2; i < original.length; i++) {
881 5242878 : uint16_t k = (d[i - 2] << 8) | d[i - 1];
882 5242878 : uint32_t damage = jsf32(&rng);
883 5242878 : d[i] = table[k];
884 5242878 : if ((damage >> 29) == 0) {
885 655652 : uint16_t index = damage & 0xffff;
886 655652 : uint8_t value = (damage >> 16) & 0xff;
887 655652 : table[index] = value;
888 : }
889 : }
890 :
891 1 : comp_size = attempt_round_trip(mem_ctx, original, "/tmp/chaos-graph", ref);
892 1 : assert_true(comp_size > 0);
893 1 : assert_true(comp_size < original.length);
894 :
895 1 : talloc_free(mem_ctx);
896 1 : }
897 :
898 :
899 1 : static void test_lzxpress_huffman_sparse_random_graph_round_trip(void **state)
900 : {
901 1 : size_t i;
902 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
903 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
904 1 : DATA_BLOB ref = {0};
905 : /*
906 : * There's a random trigram graph, with each pair of sequential bytes
907 : * pointing to a successor. This will fall into a fairly simple loops,
908 : * but we introduce damage into the system, randomly mangling about 1
909 : * byte in 65536.
910 : *
911 : * The result has very long repetitive runs, which should lead to
912 : * oversized blocks.
913 : */
914 1 : uint8_t *d = original.data;
915 1 : uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
916 1 : uint32_t *table32 = (void*)table;
917 1 : ssize_t comp_size;
918 1 : struct jsf_rng rng;
919 :
920 1 : jsf32_init(&rng, 3);
921 16385 : for (i = 0; i < (65536 / 4); i++) {
922 16384 : table32[i] = jsf32(&rng);
923 : }
924 :
925 1 : d[0] = 'a';
926 1 : d[1] = 'b';
927 :
928 5242879 : for (i = 2; i < original.length; i++) {
929 5242878 : uint16_t k = (d[i - 2] << 8) | d[i - 1];
930 5242878 : uint32_t damage = jsf32(&rng);
931 5242878 : if ((damage & 0xffff0000) == 0) {
932 77 : k ^= damage & 0xffff;
933 : }
934 5242878 : d[i] = table[k];
935 : }
936 :
937 1 : comp_size = attempt_round_trip(mem_ctx, original, "/tmp/sparse-random-graph", ref);
938 1 : assert_true(comp_size > 0);
939 1 : assert_true(comp_size < original.length);
940 :
941 1 : talloc_free(mem_ctx);
942 1 : }
943 :
944 :
945 1 : static void test_lzxpress_huffman_random_noise_round_trip(void **state)
946 : {
947 1 : size_t i;
948 1 : size_t len = 1024 * 1024;
949 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
950 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
951 1 : DATA_BLOB ref = {0};
952 1 : ssize_t comp_size;
953 : /*
954 : * We are filling this up with incompressible noise, but we can assert
955 : * quite tight bounds on how badly it will fail to compress.
956 : *
957 : * Specifically, with randomly distributed codes, the Huffman table
958 : * should come out as roughly even, averaging 8 bit codes. Then there
959 : * will be a 256 byte table every 64k, which is a 1/256 overhead (i.e.
960 : * the compressed length will be 257/256 the original *on average*).
961 : * We assert it is less than 1 in 200 but more than 1 in 300.
962 : */
963 1 : uint32_t *d32 = (uint32_t*)((void*)original.data);
964 1 : struct jsf_rng rng;
965 1 : jsf32_init(&rng, 2);
966 :
967 262145 : for (i = 0; i < (len / 4); i++) {
968 262144 : d32[i] = jsf32(&rng);
969 : }
970 :
971 1 : comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-noise", ref);
972 1 : assert_true(comp_size > 0);
973 1 : assert_true(comp_size > original.length + original.length / 300);
974 1 : assert_true(comp_size < original.length + original.length / 200);
975 : debug_message("original size %zu; compressed size %zd; ratio %.3f\n",
976 1 : len, comp_size, ((double)comp_size) / len);
977 :
978 1 : talloc_free(mem_ctx);
979 1 : }
980 :
981 :
982 1 : static void test_lzxpress_huffman_overlong_matches(void **state)
983 : {
984 1 : size_t i, j = 0;
985 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
986 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 1024 * 1024);
987 1 : DATA_BLOB ref = {0};
988 1 : uint8_t *d = original.data;
989 1 : char filename[300];
990 : /*
991 : * We are testing with something like "aaaaaaaaaaaaaaaaaaaaaaabbbbb"
992 : * where typically the number of "a"s is > 65536, and the number of
993 : * "b"s is < 42.
994 : */
995 1 : ssize_t na[] = {65535, 65536, 65537, 65559, 65575, 200000, -1};
996 1 : ssize_t nb[] = {1, 2, 20, 39, 40, 41, 42, -1};
997 1 : int score = 0;
998 1 : ssize_t comp_size;
999 :
1000 7 : for (i = 0; na[i] >= 0; i++) {
1001 6 : ssize_t a = na[i];
1002 6 : memset(d, 'a', a);
1003 48 : for (j = 0; nb[j] >= 0; j++) {
1004 42 : ssize_t b = nb[j];
1005 42 : memset(d + a, 'b', b);
1006 42 : original.length = a + b;
1007 42 : snprintf(filename, sizeof(filename),
1008 : "/tmp/overlong-%zd-%zd", a, b);
1009 42 : comp_size = attempt_round_trip(mem_ctx,
1010 : original,
1011 : filename, ref);
1012 42 : if (comp_size > 0) {
1013 42 : score++;
1014 : }
1015 : }
1016 : }
1017 1 : debug_message("%d/%zu correct\n", score, i * j);
1018 1 : assert_int_equal(score, i * j);
1019 1 : talloc_free(mem_ctx);
1020 1 : }
1021 :
1022 :
1023 1 : static void test_lzxpress_huffman_overlong_matches_abc(void **state)
1024 : {
1025 1 : size_t i, j = 0, k = 0;
1026 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
1027 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 1024 * 1024);
1028 1 : DATA_BLOB ref = {0};
1029 1 : uint8_t *d = original.data;
1030 1 : char filename[300];
1031 : /*
1032 : * We are testing with something like "aaaabbbbcc" where typically
1033 : * the number of "a"s + "b"s is around 65536, and the number of "c"s
1034 : * is < 43.
1035 : */
1036 1 : ssize_t nab[] = {1, 21, 32767, 32768, 32769, -1};
1037 1 : ssize_t nc[] = {1, 2, 20, 39, 40, 41, 42, -1};
1038 1 : int score = 0;
1039 1 : ssize_t comp_size;
1040 :
1041 6 : for (i = 0; nab[i] >= 0; i++) {
1042 5 : ssize_t a = nab[i];
1043 5 : memset(d, 'a', a);
1044 30 : for (j = 0; nab[j] >= 0; j++) {
1045 25 : ssize_t b = nab[j];
1046 25 : memset(d + a, 'b', b);
1047 200 : for (k = 0; nc[k] >= 0; k++) {
1048 175 : ssize_t c = nc[k];
1049 175 : memset(d + a + b, 'c', c);
1050 175 : original.length = a + b + c;
1051 175 : snprintf(filename, sizeof(filename),
1052 : "/tmp/overlong-abc-%zd-%zd-%zd",
1053 : a, b, c);
1054 175 : comp_size = attempt_round_trip(mem_ctx,
1055 : original,
1056 : filename, ref);
1057 175 : if (comp_size > 0) {
1058 175 : score++;
1059 : }
1060 : }
1061 : }
1062 : }
1063 1 : debug_message("%d/%zu correct\n", score, i * j * k);
1064 1 : assert_int_equal(score, i * j * k);
1065 1 : talloc_free(mem_ctx);
1066 1 : }
1067 :
1068 :
1069 1 : static void test_lzxpress_huffman_extremely_compressible_middle(void **state)
1070 : {
1071 1 : size_t len = 192 * 1024;
1072 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
1073 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
1074 1 : DATA_BLOB ref = {0};
1075 1 : ssize_t comp_size;
1076 : /*
1077 : * When a middle block (i.e. not the first and not the last of >= 3),
1078 : * can be entirely expressed as a match starting in the previous
1079 : * block, the Huffman tree would end up with 1 element, which does not
1080 : * work for the code construction. It really wants to use both bits.
1081 : * So we need to ensure we have some way of dealing with this.
1082 : */
1083 1 : memset(original.data, 'a', 0x10000 - 1);
1084 1 : memset(original.data + 0x10000 - 1, 'b', 0x10000 + 1);
1085 1 : memset(original.data + 0x20000, 'a', 0x10000);
1086 1 : comp_size = attempt_round_trip(mem_ctx, original, "/tmp/compressible-middle", ref);
1087 1 : assert_true(comp_size > 0);
1088 1 : assert_true(comp_size < 1024);
1089 : debug_message("original size %zu; compressed size %zd; ratio %.3f\n",
1090 1 : len, comp_size, ((double)comp_size) / len);
1091 :
1092 1 : talloc_free(mem_ctx);
1093 1 : }
1094 :
1095 :
1096 1 : static void test_lzxpress_huffman_max_length_limit(void **state)
1097 : {
1098 1 : size_t len = 65 * 1024 * 1024;
1099 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
1100 1 : DATA_BLOB original = data_blob_talloc_zero(mem_ctx, len);
1101 1 : DATA_BLOB ref = {0};
1102 1 : ssize_t comp_size;
1103 : /*
1104 : * Reputedly Windows has a 64MB limit in the maximum match length it
1105 : * will encode. We follow this, and test that here with nearly 65 MB
1106 : * of zeros between two letters; this should be encoded in three
1107 : * blocks:
1108 : *
1109 : * 1. 'a', 64M × '\0'
1110 : * 2. (1M - 2) × '\0' -- finishing off what would have been the same match
1111 : * 3. 'b' EOF
1112 : *
1113 : * Which we can assert by saying the length is > 768, < 1024.
1114 : */
1115 1 : original.data[0] = 'a';
1116 1 : original.data[len - 1] = 'b';
1117 1 : comp_size = attempt_round_trip(mem_ctx, original, "/tmp/max-length-limit", ref);
1118 1 : assert_true(comp_size > 0x300);
1119 1 : assert_true(comp_size < 0x400);
1120 : debug_message("original size %zu; compressed size %zd; ratio %.3f\n",
1121 1 : len, comp_size, ((double)comp_size) / len);
1122 :
1123 1 : talloc_free(mem_ctx);
1124 1 : }
1125 :
1126 :
1127 1 : static void test_lzxpress_huffman_short_boring_strings(void **state)
1128 : {
1129 1 : size_t len = 64 * 1024;
1130 1 : TALLOC_CTX *mem_ctx = talloc_new(NULL);
1131 1 : DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
1132 1 : DATA_BLOB ref = {0};
1133 1 : ssize_t comp_size;
1134 1 : ssize_t lengths[] = {
1135 : 1, 2, 20, 39, 40, 41, 42, 256, 270, 273, 274, 1000, 64000, -1};
1136 1 : char filename[300];
1137 1 : size_t i;
1138 : /*
1139 : * How do short repetitive strings work? We're poking at the limit
1140 : * around which LZ77 comprssion is turned on.
1141 : *
1142 : * For this test we don't change the blob memory between runs, just
1143 : * the declared length.
1144 : */
1145 1 : memset(original.data, 'a', len);
1146 14 : for (i = 0; lengths[i] >= 0; i++) {
1147 13 : original.length = lengths[i];
1148 13 : snprintf(filename, sizeof(filename),
1149 : "/tmp/short-boring-%zu",
1150 : original.length);
1151 13 : comp_size = attempt_round_trip(mem_ctx, original, filename, ref);
1152 13 : if (original.length < 41) {
1153 5 : assert_true(comp_size > 256 + original.length / 8);
1154 8 : } else if (original.length < 274) {
1155 5 : assert_true(comp_size == 261);
1156 : } else {
1157 3 : assert_true(comp_size == 263);
1158 : }
1159 13 : assert_true(comp_size < 261 + original.length / 8);
1160 : }
1161 : /* let's just show we didn't change the original */
1162 65537 : for (i = 0; i < len; i++) {
1163 65536 : if (original.data[i] != 'a') {
1164 65536 : fail_msg("input data[%zu] was changed! (%2x, expected %2x)\n",
1165 : i, original.data[i], 'a');
1166 : }
1167 : }
1168 :
1169 1 : talloc_free(mem_ctx);
1170 1 : }
1171 :
1172 :
1173 1 : static void test_lzxpress_huffman_compress_empty_or_null(void **state)
1174 1 : {
1175 : /*
1176 : * We expect these to fail with a -1, except the last one, which does
1177 : * the real thing.
1178 : */
1179 1 : ssize_t ret;
1180 1 : const uint8_t *input = bidirectional_pairs[0].decompressed.data;
1181 1 : size_t ilen = bidirectional_pairs[0].decompressed.length;
1182 1 : size_t olen = bidirectional_pairs[0].compressed.length;
1183 1 : uint8_t output[olen];
1184 1 : struct lzxhuff_compressor_mem cmp_mem;
1185 :
1186 1 : ret = lzxpress_huffman_compress(&cmp_mem, input, 0, output, olen);
1187 1 : assert_int_equal(ret, -1LL);
1188 1 : ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, output, 0);
1189 1 : assert_int_equal(ret, -1LL);
1190 :
1191 1 : ret = lzxpress_huffman_compress(&cmp_mem, NULL, ilen, output, olen);
1192 1 : assert_int_equal(ret, -1LL);
1193 1 : ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, NULL, olen);
1194 1 : assert_int_equal(ret, -1LL);
1195 1 : ret = lzxpress_huffman_compress(NULL, input, ilen, output, olen);
1196 1 : assert_int_equal(ret, -1LL);
1197 :
1198 1 : ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, output, olen);
1199 1 : assert_int_equal(ret, olen);
1200 1 : }
1201 :
1202 :
1203 1 : static void test_lzxpress_huffman_decompress_empty_or_null(void **state)
1204 1 : {
1205 : /*
1206 : * We expect these to fail with a -1, except the last one.
1207 : */
1208 1 : ssize_t ret;
1209 1 : const uint8_t *input = bidirectional_pairs[0].compressed.data;
1210 1 : size_t ilen = bidirectional_pairs[0].compressed.length;
1211 1 : size_t olen = bidirectional_pairs[0].decompressed.length;
1212 1 : uint8_t output[olen];
1213 :
1214 1 : ret = lzxpress_huffman_decompress(input, 0, output, olen);
1215 1 : assert_int_equal(ret, -1LL);
1216 1 : ret = lzxpress_huffman_decompress(input, ilen, output, 0);
1217 1 : assert_int_equal(ret, -1LL);
1218 :
1219 1 : ret = lzxpress_huffman_decompress(NULL, ilen, output, olen);
1220 1 : assert_int_equal(ret, -1LL);
1221 1 : ret = lzxpress_huffman_decompress(input, ilen, NULL, olen);
1222 1 : assert_int_equal(ret, -1LL);
1223 :
1224 1 : ret = lzxpress_huffman_decompress(input, ilen, output, olen);
1225 1 : assert_int_equal(ret, olen);
1226 1 : }
1227 :
1228 :
1229 1 : int main(void) {
1230 1 : const struct CMUnitTest tests[] = {
1231 : cmocka_unit_test(test_lzxpress_huffman_short_boring_strings),
1232 : cmocka_unit_test(test_lzxpress_huffman_max_length_limit),
1233 : cmocka_unit_test(test_lzxpress_huffman_extremely_compressible_middle),
1234 : cmocka_unit_test(test_lzxpress_huffman_long_random_graph_round_trip),
1235 : cmocka_unit_test(test_lzxpress_huffman_chaos_graph_round_trip),
1236 : cmocka_unit_test(test_lzxpress_huffman_sparse_random_graph_round_trip),
1237 : cmocka_unit_test(test_lzxpress_huffman_round_trip),
1238 : cmocka_unit_test(test_lzxpress_huffman_decompress_files),
1239 : cmocka_unit_test(test_lzxpress_huffman_decompress_more_compressed_files),
1240 : cmocka_unit_test(test_lzxpress_huffman_compress),
1241 : cmocka_unit_test(test_lzxpress_huffman_decompress),
1242 : cmocka_unit_test(test_lzxpress_huffman_long_gpl_round_trip),
1243 : cmocka_unit_test(test_lzxpress_huffman_long_random_graph_round_trip),
1244 : cmocka_unit_test(test_lzxpress_huffman_random_noise_round_trip),
1245 : cmocka_unit_test(test_lzxpress_huffman_overlong_matches_abc),
1246 : cmocka_unit_test(test_lzxpress_huffman_overlong_matches),
1247 : cmocka_unit_test(test_lzxpress_huffman_decompress_empty_or_null),
1248 : cmocka_unit_test(test_lzxpress_huffman_compress_empty_or_null),
1249 : };
1250 1 : if (!isatty(1)) {
1251 1 : cmocka_set_message_output(CM_OUTPUT_SUBUNIT);
1252 : }
1253 :
1254 1 : return cmocka_run_group_tests(tests, NULL, NULL);
1255 : }
|