Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : local testing of iconv routines. This tests the system iconv code against
5 : the built-in iconv code
6 :
7 : Copyright (C) Andrew Tridgell 2004
8 :
9 : This program is free software; you can redistribute it and/or modify
10 : it under the terms of the GNU General Public License as published by
11 : the Free Software Foundation; either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : This program is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU General Public License for more details.
18 :
19 : You should have received a copy of the GNU General Public License
20 : along with this program. If not, see <http://www.gnu.org/licenses/>.
21 : */
22 :
23 : #include "includes.h"
24 : #include "torture/torture.h"
25 : #include "system/iconv.h"
26 : #include "system/time.h"
27 : #include "libcli/raw/libcliraw.h"
28 : #include "param/param.h"
29 : #include "torture/util.h"
30 : #include "torture/local/proto.h"
31 : #include "talloc.h"
32 :
33 : #ifdef HAVE_NATIVE_ICONV
34 :
35 3 : static bool iconv_untestable(struct torture_context *tctx)
36 : {
37 3 : iconv_t cd;
38 :
39 3 : cd = iconv_open("UTF-16LE", "UCS-4LE");
40 3 : if (cd == (iconv_t)-1)
41 0 : torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
42 3 : iconv_close(cd);
43 :
44 3 : cd = iconv_open("UTF-16LE", "CP850");
45 3 : if (cd == (iconv_t)-1)
46 0 : torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
47 3 : iconv_close(cd);
48 :
49 3 : return false;
50 : }
51 :
52 : /*
53 : generate a UTF-16LE buffer for a given unicode codepoint
54 : */
55 1048576 : static int gen_codepoint_utf16(unsigned int codepoint,
56 : char *buf, size_t *size)
57 : {
58 1048576 : static iconv_t cd;
59 1048576 : uint8_t in[4];
60 1048576 : char *ptr_in;
61 1048576 : size_t size_in, size_out, ret;
62 1048576 : if (!cd) {
63 1 : cd = iconv_open("UTF-16LE", "UCS-4LE");
64 1 : if (cd == (iconv_t)-1) {
65 0 : cd = NULL;
66 0 : return -1;
67 : }
68 : }
69 :
70 1048576 : in[0] = codepoint & 0xFF;
71 1048576 : in[1] = (codepoint>>8) & 0xFF;
72 1048576 : in[2] = (codepoint>>16) & 0xFF;
73 1048576 : in[3] = (codepoint>>24) & 0xFF;
74 :
75 1048576 : ptr_in = (char *)in;
76 1048576 : size_in = 4;
77 1048576 : size_out = 8;
78 :
79 1048576 : ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
80 :
81 1048576 : *size = 8 - size_out;
82 :
83 1048576 : return ret;
84 : }
85 :
86 :
87 : /*
88 : work out the unicode codepoint of the first UTF-8 character in the buffer
89 : */
90 1 : static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
91 : {
92 1 : iconv_t cd;
93 1 : uint8_t out[4];
94 1 : char *ptr_out;
95 1 : size_t size_out, size_in, ret;
96 :
97 1 : cd = iconv_open("UCS-4LE", charset);
98 :
99 1 : size_in = size;
100 1 : ptr_out = (char *)out;
101 1 : size_out = sizeof(out);
102 1 : memset(out, 0, sizeof(out));
103 :
104 1 : ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
105 1 : iconv_close(cd);
106 1 : if (ret == (size_t) -1) {
107 0 : return (unsigned int)-1;
108 : }
109 :
110 1 : return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
111 : }
112 :
113 : /*
114 : display a buffer with name prefix
115 : */
116 0 : static void show_buf(const char *name, uint8_t *buf, size_t size)
117 : {
118 0 : int i;
119 0 : printf("%s ", name);
120 0 : for (i=0;i<size;i++) {
121 0 : printf("%02x ", buf[i]);
122 : }
123 0 : printf("\n");
124 0 : }
125 :
126 : /*
127 : given a UTF-16LE buffer, test the system and built-in iconv code to
128 : make sure they do exactly the same thing in converting the buffer to
129 : "charset", then convert it back again and ensure we get the same
130 : buffer back
131 : */
132 2046528 : static bool test_buffer(struct torture_context *test,
133 : uint8_t *inbuf, size_t size, const char *charset)
134 : {
135 2046528 : uint8_t buf1[1000], buf2[1000], buf3[1000];
136 2046528 : size_t outsize1, outsize2, outsize3;
137 2046528 : const char *ptr_in1;
138 2046528 : char *ptr_in2;
139 2046528 : char *ptr_out;
140 2046528 : size_t size_in1, size_in2, size_in3;
141 2046528 : size_t ret1, ret2, ret3, len1, len2;
142 2046528 : int errno1, errno2;
143 2046528 : static iconv_t cd;
144 2046528 : static smb_iconv_t cd2, cd3;
145 2046528 : static const char *last_charset;
146 :
147 2046528 : if (cd && last_charset) {
148 2046527 : iconv_close(cd);
149 2046527 : smb_iconv_close(cd2);
150 2046527 : smb_iconv_close(cd3);
151 2046527 : cd = NULL;
152 : }
153 :
154 2046528 : if (!cd) {
155 2046528 : cd = iconv_open(charset, "UTF-16LE");
156 2046528 : if (cd == (iconv_t)-1) {
157 0 : torture_fail(test,
158 : talloc_asprintf(test,
159 : "failed to open %s to UTF-16LE",
160 : charset));
161 : }
162 2046528 : cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true));
163 2046528 : if (cd2 == (iconv_t)-1) {
164 0 : torture_fail(test,
165 : talloc_asprintf(test,
166 : "failed to open %s to UTF-16LE via smb_iconv_open_ex",
167 : charset));
168 : }
169 2046528 : cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true));
170 2046528 : if (cd3 == (iconv_t)-1) {
171 0 : torture_fail(test,
172 : talloc_asprintf(test,
173 : "failed to open UTF-16LE to %s via smb_iconv_open_ex",
174 : charset));
175 : }
176 2046528 : last_charset = charset;
177 : }
178 :
179 : /* internal convert to charset - placing result in buf1 */
180 2046528 : ptr_in1 = (const char *)inbuf;
181 2046528 : ptr_out = (char *)buf1;
182 2046528 : size_in1 = size;
183 2046528 : outsize1 = sizeof(buf1);
184 :
185 2046528 : memset(ptr_out, 0, outsize1);
186 2046528 : errno = 0;
187 2046528 : ret1 = smb_iconv(cd2, &ptr_in1, &size_in1, &ptr_out, &outsize1);
188 2046528 : errno1 = errno;
189 :
190 : /* system convert to charset - placing result in buf2 */
191 2046528 : ptr_in2 = (char *)inbuf;
192 2046528 : ptr_out = (char *)buf2;
193 2046528 : size_in2 = size;
194 2046528 : outsize2 = sizeof(buf2);
195 :
196 2046528 : memset(ptr_out, 0, outsize2);
197 2046528 : errno = 0;
198 2046528 : ret2 = iconv(cd, &ptr_in2, &size_in2, &ptr_out, &outsize2);
199 2046528 : errno2 = errno;
200 :
201 2046528 : len1 = sizeof(buf1) - outsize1;
202 2046528 : len2 = sizeof(buf2) - outsize2;
203 :
204 : /* codepoints above 1M are not interesting for now */
205 2046528 : if (len2 > len1 &&
206 0 : memcmp(buf1, buf2, len1) == 0 &&
207 0 : get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
208 0 : return true;
209 : }
210 2046528 : if (len1 > len2 &&
211 0 : memcmp(buf1, buf2, len2) == 0 &&
212 0 : get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
213 0 : return true;
214 : }
215 :
216 2046528 : torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
217 :
218 2046528 : if (errno1 != errno2) {
219 0 : show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
220 0 : show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
221 0 : torture_fail(test, talloc_asprintf(test,
222 : "errno mismatch with %s internal=%d/%s system=%d/%s",
223 : charset,
224 : errno1, strerror(errno1),
225 : errno2, strerror(errno2)));
226 : }
227 :
228 2046528 : torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
229 :
230 2046528 : torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
231 :
232 2046528 : if (len1 != len2 ||
233 2046528 : memcmp(buf1, buf2, len1) != 0) {
234 0 : torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
235 0 : show_buf(" IN1:", inbuf, size-size_in1);
236 0 : show_buf(" IN2:", inbuf, size-size_in2);
237 0 : show_buf("OUT1:", buf1, len1);
238 0 : show_buf("OUT2:", buf2, len2);
239 0 : if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
240 0 : torture_comment(test, "next codepoint is %u",
241 : get_codepoint((char *)(buf2+len1), len2-len1, charset));
242 : }
243 0 : if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
244 0 : torture_comment(test, "next codepoint is %u",
245 : get_codepoint((char *)(buf1+len2),len1-len2, charset));
246 : }
247 :
248 0 : torture_fail(test, "failed");
249 : }
250 :
251 : /* convert back to UTF-16, putting result in buf3 */
252 2046528 : size = size - size_in1;
253 2046528 : ptr_in1 = (const char *)buf1;
254 2046528 : ptr_out = (char *)buf3;
255 2046528 : size_in3 = len1;
256 2046528 : outsize3 = sizeof(buf3);
257 :
258 2046528 : memset(ptr_out, 0, outsize3);
259 2046528 : ret3 = smb_iconv(cd3, &ptr_in1, &size_in3, &ptr_out, &outsize3);
260 :
261 : /* we only internally support the first 1M codepoints */
262 2046529 : if (outsize3 != sizeof(buf3) - size &&
263 1 : get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
264 1 : size - (sizeof(buf3) - outsize3),
265 : "UTF-16LE") >= (1<<20)) {
266 0 : return true;
267 : }
268 :
269 2046528 : torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
270 : "pull failed - %s", strerror(errno)));
271 :
272 2046528 : if (strncmp(charset, "UTF", 3) != 0) {
273 : /* don't expect perfect mappings for non UTF charsets */
274 0 : return true;
275 : }
276 :
277 :
278 1546528 : torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
279 : "wrong outsize3");
280 :
281 1546528 : if (memcmp(buf3, inbuf, size) != 0) {
282 0 : torture_comment(test, "pull bytes mismatch:");
283 0 : show_buf("inbuf", inbuf, size);
284 0 : show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
285 0 : torture_comment(test, "next codepoint is %u\n",
286 0 : get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
287 0 : size - (sizeof(buf3) - outsize3),
288 : "UTF-16LE"));
289 0 : torture_fail(test, "");
290 : }
291 :
292 0 : return true;
293 : }
294 :
295 :
296 : /*
297 : test the push_codepoint() and next_codepoint() functions for a given
298 : codepoint
299 : */
300 1048576 : static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
301 : {
302 1048576 : uint8_t buf[10];
303 1048576 : size_t size, size2;
304 1048576 : codepoint_t c;
305 :
306 1048576 : size = push_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, codepoint);
307 1048576 : torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
308 : "Invalid Codepoint range");
309 :
310 1048576 : if (size == -1) return true;
311 :
312 1046528 : buf[size] = random();
313 1046528 : buf[size+1] = random();
314 1046528 : buf[size+2] = random();
315 1046528 : buf[size+3] = random();
316 :
317 1046528 : c = next_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, &size2);
318 :
319 1046528 : torture_assert(tctx, c == codepoint,
320 : talloc_asprintf(tctx,
321 : "next_codepoint(%u) failed - gave %u", codepoint, c));
322 :
323 1046528 : torture_assert(tctx, size2 == size,
324 : talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
325 : codepoint, (int)size2, (int)size));
326 :
327 0 : return true;
328 : }
329 :
330 1 : static bool test_next_codepoint(struct torture_context *tctx)
331 : {
332 1 : unsigned int codepoint;
333 1 : if (iconv_untestable(tctx))
334 0 : return true;
335 :
336 1048577 : for (codepoint=0;codepoint<(1<<20);codepoint++) {
337 1048576 : if (!test_codepoint(tctx, codepoint))
338 0 : return false;
339 : }
340 0 : return true;
341 : }
342 :
343 1 : static bool test_first_1m(struct torture_context *tctx)
344 : {
345 1 : unsigned int codepoint;
346 1 : size_t size;
347 1 : unsigned char inbuf[1000];
348 :
349 1 : if (iconv_untestable(tctx))
350 0 : return true;
351 :
352 1048577 : for (codepoint=0;codepoint<(1<<20);codepoint++) {
353 1048576 : if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
354 2048 : continue;
355 : }
356 :
357 1046528 : if (codepoint % 1000 == 0) {
358 1047 : if (torture_setting_bool(tctx, "progress", true)) {
359 0 : torture_comment(tctx, "codepoint=%u \r", codepoint);
360 0 : fflush(stdout);
361 : }
362 : }
363 :
364 1046528 : if (!test_buffer(tctx, inbuf, size, "UTF-8"))
365 0 : return false;
366 : }
367 0 : return true;
368 : }
369 :
370 1 : static bool test_random_5m(struct torture_context *tctx)
371 : {
372 1 : unsigned char inbuf[1000];
373 1 : unsigned int i;
374 :
375 1 : if (iconv_untestable(tctx))
376 0 : return true;
377 :
378 500001 : for (i=0;i<500000;i++) {
379 500000 : size_t size;
380 500000 : unsigned int c;
381 :
382 500000 : if (i % 1000 == 0) {
383 500 : if (torture_setting_bool(tctx, "progress", true)) {
384 0 : torture_comment(tctx, "i=%u \r", i);
385 0 : fflush(stdout);
386 : }
387 : }
388 :
389 500000 : size = random() % 100;
390 25264672 : for (c=0;c<size;c++) {
391 24764672 : if (random() % 100 < 80) {
392 19814139 : inbuf[c] = random() % 128;
393 : } else {
394 4950533 : inbuf[c] = random();
395 : }
396 24764672 : if (random() % 10 == 0) {
397 2474454 : inbuf[c] |= 0xd8;
398 : }
399 24764672 : if (random() % 10 == 0) {
400 2475082 : inbuf[c] |= 0xdc;
401 : }
402 : }
403 500000 : if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
404 0 : printf("i=%d failed UTF-8\n", i);
405 0 : return false;
406 : }
407 :
408 500000 : if (!test_buffer(tctx, inbuf, size, "CP850")) {
409 0 : printf("i=%d failed CP850\n", i);
410 0 : return false;
411 : }
412 : }
413 0 : return true;
414 : }
415 :
416 :
417 2 : static bool test_string2key(struct torture_context *tctx)
418 : {
419 2 : uint16_t *buf;
420 2 : char *dest = NULL;
421 2 : TALLOC_CTX *mem_ctx = talloc_new(tctx);
422 2 : size_t len = (random()%1000)+1;
423 2 : const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
424 2 : uint8_t le1[20];
425 2 : uint8_t *munged1;
426 2 : uint8_t *out1;
427 2 : size_t ret;
428 2 : int i;
429 2 : const char *correct = "a\357\277\275b\357\277\275c\001defg";
430 :
431 2 : buf = talloc_size(mem_ctx, len*2);
432 2 : generate_random_buffer((uint8_t *)buf, len*2);
433 :
434 2 : torture_comment(tctx, "converting random buffer\n");
435 :
436 2 : if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret)) {
437 0 : torture_fail(tctx, "Failed to convert random buffer\n");
438 : }
439 :
440 22 : for (i=0;i<10;i++) {
441 20 : SSVAL(&le1[2*i], 0, in1[i]);
442 : }
443 :
444 2 : torture_comment(tctx, "converting fixed buffer to UTF16\n");
445 :
446 2 : if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret)) {
447 0 : torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
448 : }
449 :
450 2 : torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
451 :
452 2 : torture_comment(tctx, "converting fixed buffer to UTF8\n");
453 :
454 2 : if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret)) {
455 0 : torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
456 : }
457 :
458 2 : torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
459 : "conversion gave incorrect result\n");
460 :
461 2 : talloc_free(mem_ctx);
462 :
463 2 : return true;
464 : }
465 :
466 2354 : struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
467 : {
468 2354 : struct torture_suite *suite = torture_suite_create(mem_ctx, "iconv");
469 :
470 2354 : torture_suite_add_simple_test(suite, "string2key",
471 : test_string2key);
472 :
473 2354 : torture_suite_add_simple_test(suite, "next_codepoint()",
474 : test_next_codepoint);
475 :
476 2354 : torture_suite_add_simple_test(suite, "first 1M codepoints",
477 : test_first_1m);
478 :
479 2354 : torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
480 : test_random_5m);
481 :
482 2354 : torture_suite_add_simple_test(suite, "string2key",
483 : test_string2key);
484 2354 : return suite;
485 : }
486 :
487 : #else
488 :
489 : struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
490 : {
491 : printf("No native iconv library - can't run iconv test\n");
492 : return NULL;
493 : }
494 :
495 : #endif
|