LCOV - code coverage report
Current view: top level - lib/util/charset/tests - iconv.c (source / functions) Hit Total Coverage
Test: coverage report for master 2f515e9b Lines: 190 254 74.8 %
Date: 2024-04-21 15:09:00 Functions: 10 11 90.9 %

          Line data    Source code
       1             : /* 
       2             :    Unix SMB/CIFS implementation.
       3             : 
       4             :    local testing of iconv routines. This tests the system iconv code against
       5             :    the built-in iconv code
       6             : 
       7             :    Copyright (C) Andrew Tridgell 2004
       8             :    
       9             :    This program is free software; you can redistribute it and/or modify
      10             :    it under the terms of the GNU General Public License as published by
      11             :    the Free Software Foundation; either version 3 of the License, or
      12             :    (at your option) any later version.
      13             :    
      14             :    This program is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU General Public License for more details.
      18             :    
      19             :    You should have received a copy of the GNU General Public License
      20             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      21             : */
      22             : 
      23             : #include "includes.h"
      24             : #include "torture/torture.h"
      25             : #include "system/iconv.h"
      26             : #include "system/time.h"
      27             : #include "libcli/raw/libcliraw.h"
      28             : #include "param/param.h"
      29             : #include "torture/util.h"
      30             : #include "torture/local/proto.h"
      31             : #include "talloc.h"
      32             : 
      33             : #ifdef HAVE_NATIVE_ICONV
      34             : 
      35           3 : static bool iconv_untestable(struct torture_context *tctx)
      36             : {
      37           3 :         iconv_t cd;
      38             : 
      39           3 :         cd = iconv_open("UTF-16LE", "UCS-4LE");
      40           3 :         if (cd == (iconv_t)-1)
      41           0 :                 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
      42           3 :         iconv_close(cd);
      43             : 
      44           3 :         cd = iconv_open("UTF-16LE", "CP850");
      45           3 :         if (cd == (iconv_t)-1)
      46           0 :                 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
      47           3 :         iconv_close(cd);
      48             : 
      49           3 :         return false;
      50             : }
      51             : 
      52             : /*
      53             :   generate a UTF-16LE buffer for a given unicode codepoint
      54             : */
      55     1048576 : static int gen_codepoint_utf16(unsigned int codepoint,
      56             :                                char *buf, size_t *size)
      57             : {
      58     1048576 :         static iconv_t cd;
      59     1048576 :         uint8_t in[4];
      60     1048576 :         char *ptr_in;
      61     1048576 :         size_t size_in, size_out, ret;
      62     1048576 :         if (!cd) {
      63           1 :                 cd = iconv_open("UTF-16LE", "UCS-4LE");
      64           1 :                 if (cd == (iconv_t)-1) {
      65           0 :                         cd = NULL;
      66           0 :                         return -1;
      67             :                 }
      68             :         }
      69             : 
      70     1048576 :         in[0] = codepoint & 0xFF;
      71     1048576 :         in[1] = (codepoint>>8) & 0xFF;
      72     1048576 :         in[2] = (codepoint>>16) & 0xFF;
      73     1048576 :         in[3] = (codepoint>>24) & 0xFF;
      74             : 
      75     1048576 :         ptr_in = (char *)in;
      76     1048576 :         size_in = 4;
      77     1048576 :         size_out = 8;
      78             : 
      79     1048576 :         ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
      80             : 
      81     1048576 :         *size = 8 - size_out;
      82             : 
      83     1048576 :         return ret;
      84             : }
      85             : 
      86             : 
      87             : /*
      88             :   work out the unicode codepoint of the first UTF-8 character in the buffer
      89             : */
      90           1 : static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
      91             : {
      92           1 :         iconv_t cd;
      93           1 :         uint8_t out[4];
      94           1 :         char *ptr_out;
      95           1 :         size_t size_out, size_in, ret;
      96             : 
      97           1 :         cd = iconv_open("UCS-4LE", charset);
      98             : 
      99           1 :         size_in = size;
     100           1 :         ptr_out = (char *)out;
     101           1 :         size_out = sizeof(out);
     102           1 :         memset(out, 0, sizeof(out));
     103             : 
     104           1 :         ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
     105           1 :         iconv_close(cd);
     106           1 :         if (ret == (size_t) -1) {
     107           0 :                 return (unsigned int)-1;
     108             :         }
     109             : 
     110           1 :         return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
     111             : }
     112             : 
     113             : /*
     114             :   display a buffer with name prefix
     115             : */
     116           0 : static void show_buf(const char *name, uint8_t *buf, size_t size)
     117             : {
     118           0 :         int i;
     119           0 :         printf("%s ", name);
     120           0 :         for (i=0;i<size;i++) {
     121           0 :                 printf("%02x ", buf[i]);
     122             :         }
     123           0 :         printf("\n");
     124           0 : }
     125             : 
     126             : /*
     127             :   given a UTF-16LE buffer, test the system and built-in iconv code to
     128             :   make sure they do exactly the same thing in converting the buffer to
     129             :   "charset", then convert it back again and ensure we get the same
     130             :   buffer back
     131             : */
     132     2046528 : static bool test_buffer(struct torture_context *test, 
     133             :                         uint8_t *inbuf, size_t size, const char *charset)
     134             : {
     135     2046528 :         uint8_t buf1[1000], buf2[1000], buf3[1000];
     136     2046528 :         size_t outsize1, outsize2, outsize3;
     137     2046528 :         const char *ptr_in1;
     138     2046528 :         char *ptr_in2;
     139     2046528 :         char *ptr_out;
     140     2046528 :         size_t size_in1, size_in2, size_in3;
     141     2046528 :         size_t ret1, ret2, ret3, len1, len2;
     142     2046528 :         int errno1, errno2;
     143     2046528 :         static iconv_t cd;
     144     2046528 :         static smb_iconv_t cd2, cd3;
     145     2046528 :         static const char *last_charset;
     146             : 
     147     2046528 :         if (cd && last_charset) {
     148     2046527 :                 iconv_close(cd);
     149     2046527 :                 smb_iconv_close(cd2);
     150     2046527 :                 smb_iconv_close(cd3);
     151     2046527 :                 cd = NULL;
     152             :         }
     153             : 
     154     2046528 :         if (!cd) {
     155     2046528 :                 cd = iconv_open(charset, "UTF-16LE");
     156     2046528 :                 if (cd == (iconv_t)-1) {
     157           0 :                         torture_fail(test, 
     158             :                                      talloc_asprintf(test, 
     159             :                                                      "failed to open %s to UTF-16LE",
     160             :                                                      charset));
     161             :                 }
     162     2046528 :                 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true));
     163     2046528 :                 if (cd2 == (iconv_t)-1) {
     164           0 :                         torture_fail(test, 
     165             :                                      talloc_asprintf(test, 
     166             :                                                      "failed to open %s to UTF-16LE via smb_iconv_open_ex",
     167             :                                                      charset));
     168             :                 }
     169     2046528 :                 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true));
     170     2046528 :                 if (cd3 == (iconv_t)-1) {
     171           0 :                         torture_fail(test, 
     172             :                                      talloc_asprintf(test, 
     173             :                                                      "failed to open UTF-16LE to %s via smb_iconv_open_ex",
     174             :                                                      charset));
     175             :                 }
     176     2046528 :                 last_charset = charset;
     177             :         }
     178             : 
     179             :         /* internal convert to charset - placing result in buf1 */
     180     2046528 :         ptr_in1 = (const char *)inbuf;
     181     2046528 :         ptr_out = (char *)buf1;
     182     2046528 :         size_in1 = size;
     183     2046528 :         outsize1 = sizeof(buf1);
     184             : 
     185     2046528 :         memset(ptr_out, 0, outsize1);
     186     2046528 :         errno = 0;
     187     2046528 :         ret1 = smb_iconv(cd2, &ptr_in1, &size_in1, &ptr_out, &outsize1);
     188     2046528 :         errno1 = errno;
     189             : 
     190             :         /* system convert to charset - placing result in buf2 */
     191     2046528 :         ptr_in2 = (char *)inbuf;
     192     2046528 :         ptr_out = (char *)buf2;
     193     2046528 :         size_in2 = size;
     194     2046528 :         outsize2 = sizeof(buf2);
     195             :         
     196     2046528 :         memset(ptr_out, 0, outsize2);
     197     2046528 :         errno = 0;
     198     2046528 :         ret2 = iconv(cd, &ptr_in2, &size_in2, &ptr_out, &outsize2);
     199     2046528 :         errno2 = errno;
     200             : 
     201     2046528 :         len1 = sizeof(buf1) - outsize1;
     202     2046528 :         len2 = sizeof(buf2) - outsize2;
     203             : 
     204             :         /* codepoints above 1M are not interesting for now */
     205     2046528 :         if (len2 > len1 && 
     206           0 :             memcmp(buf1, buf2, len1) == 0 && 
     207           0 :             get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
     208           0 :                 return true;
     209             :         }
     210     2046528 :         if (len1 > len2 && 
     211           0 :             memcmp(buf1, buf2, len2) == 0 && 
     212           0 :             get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
     213           0 :                 return true;
     214             :         }
     215             : 
     216     2046528 :         torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
     217             : 
     218     2046528 :         if (errno1 != errno2) {
     219           0 :                 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
     220           0 :                 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
     221           0 :                 torture_fail(test, talloc_asprintf(test, 
     222             :                                                    "errno mismatch with %s internal=%d/%s system=%d/%s", 
     223             :                                                    charset, 
     224             :                                                    errno1, strerror(errno1), 
     225             :                                                    errno2, strerror(errno2)));
     226             :         }
     227             :         
     228     2046528 :         torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
     229             :         
     230     2046528 :         torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
     231             : 
     232     2046528 :         if (len1 != len2 ||
     233     2046528 :             memcmp(buf1, buf2, len1) != 0) {
     234           0 :                 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
     235           0 :                 show_buf(" IN1:", inbuf, size-size_in1);
     236           0 :                 show_buf(" IN2:", inbuf, size-size_in2);
     237           0 :                 show_buf("OUT1:", buf1, len1);
     238           0 :                 show_buf("OUT2:", buf2, len2);
     239           0 :                 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
     240           0 :                         torture_comment(test, "next codepoint is %u", 
     241             :                                get_codepoint((char *)(buf2+len1), len2-len1, charset));
     242             :                 }
     243           0 :                 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
     244           0 :                         torture_comment(test, "next codepoint is %u", 
     245             :                                get_codepoint((char *)(buf1+len2),len1-len2, charset));
     246             :                 }
     247             : 
     248           0 :                 torture_fail(test, "failed");
     249             :         }
     250             : 
     251             :         /* convert back to UTF-16, putting result in buf3 */
     252     2046528 :         size = size - size_in1;
     253     2046528 :         ptr_in1 = (const char *)buf1;
     254     2046528 :         ptr_out = (char *)buf3;
     255     2046528 :         size_in3 = len1;
     256     2046528 :         outsize3 = sizeof(buf3);
     257             : 
     258     2046528 :         memset(ptr_out, 0, outsize3);
     259     2046528 :         ret3 = smb_iconv(cd3, &ptr_in1, &size_in3, &ptr_out, &outsize3);
     260             : 
     261             :         /* we only internally support the first 1M codepoints */
     262     2046529 :         if (outsize3 != sizeof(buf3) - size &&
     263           1 :             get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), 
     264           1 :                           size - (sizeof(buf3) - outsize3),
     265             :                           "UTF-16LE") >= (1<<20)) {
     266           0 :                 return true;
     267             :         }
     268             : 
     269     2046528 :         torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test, 
     270             :                                                                 "pull failed - %s", strerror(errno)));
     271             : 
     272     2046528 :         if (strncmp(charset, "UTF", 3) != 0) {
     273             :                 /* don't expect perfect mappings for non UTF charsets */
     274           0 :                 return true;
     275             :         }
     276             : 
     277             : 
     278     1546528 :         torture_assert_int_equal(test, outsize3, sizeof(buf3) - size, 
     279             :                 "wrong outsize3");
     280             :         
     281     1546528 :         if (memcmp(buf3, inbuf, size) != 0) {
     282           0 :                 torture_comment(test, "pull bytes mismatch:");
     283           0 :                 show_buf("inbuf", inbuf, size);
     284           0 :                 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
     285           0 :                 torture_comment(test, "next codepoint is %u\n", 
     286           0 :                        get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), 
     287           0 :                                      size - (sizeof(buf3) - outsize3),
     288             :                                      "UTF-16LE"));
     289           0 :                 torture_fail(test, "");
     290             :         }
     291             : 
     292           0 :         return true;
     293             : }
     294             : 
     295             : 
     296             : /*
     297             :   test the push_codepoint() and next_codepoint() functions for a given
     298             :   codepoint
     299             : */
     300     1048576 : static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
     301             : {
     302     1048576 :         uint8_t buf[10];
     303     1048576 :         size_t size, size2;
     304     1048576 :         codepoint_t c;
     305             : 
     306     1048576 :         size = push_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, codepoint);
     307     1048576 :         torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000), 
     308             :                        "Invalid Codepoint range");
     309             : 
     310     1048576 :         if (size == -1) return true;
     311             : 
     312     1046528 :         buf[size] = random();
     313     1046528 :         buf[size+1] = random();
     314     1046528 :         buf[size+2] = random();
     315     1046528 :         buf[size+3] = random();
     316             : 
     317     1046528 :         c = next_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, &size2);
     318             : 
     319     1046528 :         torture_assert(tctx, c == codepoint, 
     320             :                        talloc_asprintf(tctx, 
     321             :                                        "next_codepoint(%u) failed - gave %u", codepoint, c));
     322             : 
     323     1046528 :         torture_assert(tctx, size2 == size, 
     324             :                         talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n", 
     325             :                        codepoint, (int)size2, (int)size));
     326             : 
     327           0 :         return true;
     328             : }
     329             : 
     330           1 : static bool test_next_codepoint(struct torture_context *tctx)
     331             : {
     332           1 :         unsigned int codepoint;
     333           1 :         if (iconv_untestable(tctx))
     334           0 :                 return true;
     335             : 
     336     1048577 :         for (codepoint=0;codepoint<(1<<20);codepoint++) {
     337     1048576 :                 if (!test_codepoint(tctx, codepoint))
     338           0 :                         return false;
     339             :         }
     340           0 :         return true;
     341             : }
     342             : 
     343           1 : static bool test_first_1m(struct torture_context *tctx)
     344             : {
     345           1 :         unsigned int codepoint;
     346           1 :         size_t size;
     347           1 :         unsigned char inbuf[1000];
     348             : 
     349           1 :         if (iconv_untestable(tctx))
     350           0 :                 return true;
     351             : 
     352     1048577 :         for (codepoint=0;codepoint<(1<<20);codepoint++) {
     353     1048576 :                 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
     354        2048 :                         continue;
     355             :                 }
     356             : 
     357     1046528 :                 if (codepoint % 1000 == 0) {
     358        1047 :                         if (torture_setting_bool(tctx, "progress", true)) {
     359           0 :                                 torture_comment(tctx, "codepoint=%u   \r", codepoint);
     360           0 :                                 fflush(stdout);
     361             :                         }
     362             :                 }
     363             : 
     364     1046528 :                 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
     365           0 :                         return false;
     366             :         }
     367           0 :         return true;
     368             : }
     369             : 
     370           1 : static bool test_random_5m(struct torture_context *tctx)
     371             : {
     372           1 :         unsigned char inbuf[1000];
     373           1 :         unsigned int i;
     374             : 
     375           1 :         if (iconv_untestable(tctx))
     376           0 :                 return true;
     377             : 
     378      500001 :         for (i=0;i<500000;i++) {
     379      500000 :                 size_t size;
     380      500000 :                 unsigned int c;
     381             : 
     382      500000 :                 if (i % 1000 == 0) {
     383         500 :                         if (torture_setting_bool(tctx, "progress", true)) {
     384           0 :                                 torture_comment(tctx, "i=%u              \r", i);
     385           0 :                                 fflush(stdout);
     386             :                         }
     387             :                 }
     388             : 
     389      500000 :                 size = random() % 100;
     390    25264672 :                 for (c=0;c<size;c++) {
     391    24764672 :                         if (random() % 100 < 80) {
     392    19814139 :                                 inbuf[c] = random() % 128;
     393             :                         } else {
     394     4950533 :                                 inbuf[c] = random();
     395             :                         }
     396    24764672 :                         if (random() % 10 == 0) {
     397     2474454 :                                 inbuf[c] |= 0xd8;
     398             :                         }
     399    24764672 :                         if (random() % 10 == 0) {
     400     2475082 :                                 inbuf[c] |= 0xdc;
     401             :                         }
     402             :                 }
     403      500000 :                 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
     404           0 :                         printf("i=%d failed UTF-8\n", i);
     405           0 :                         return false;
     406             :                 }
     407             : 
     408      500000 :                 if (!test_buffer(tctx, inbuf, size, "CP850")) {
     409           0 :                         printf("i=%d failed CP850\n", i);
     410           0 :                         return false;
     411             :                 }
     412             :         }
     413           0 :         return true;
     414             : }
     415             : 
     416             : 
     417           2 : static bool test_string2key(struct torture_context *tctx)
     418             : {
     419           2 :         uint16_t *buf;
     420           2 :         char *dest = NULL;
     421           2 :         TALLOC_CTX *mem_ctx = talloc_new(tctx);
     422           2 :         size_t len = (random()%1000)+1;
     423           2 :         const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
     424           2 :         uint8_t le1[20];
     425           2 :         uint8_t *munged1;
     426           2 :         uint8_t *out1;
     427           2 :         size_t ret;
     428           2 :         int i;
     429           2 :         const char *correct = "a\357\277\275b\357\277\275c\001defg";
     430             : 
     431           2 :         buf = talloc_size(mem_ctx, len*2);
     432           2 :         generate_random_buffer((uint8_t *)buf, len*2);
     433             : 
     434           2 :         torture_comment(tctx, "converting random buffer\n");
     435             : 
     436           2 :         if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret)) {
     437           0 :                 torture_fail(tctx, "Failed to convert random buffer\n");
     438             :         }
     439             : 
     440          22 :         for (i=0;i<10;i++) {
     441          20 :                 SSVAL(&le1[2*i], 0, in1[i]);
     442             :         }
     443             : 
     444           2 :         torture_comment(tctx, "converting fixed buffer to UTF16\n");
     445             : 
     446           2 :         if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret)) {
     447           0 :                 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
     448             :         }
     449             : 
     450           2 :         torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
     451             : 
     452           2 :         torture_comment(tctx, "converting fixed buffer to UTF8\n");
     453             : 
     454           2 :         if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret)) {
     455           0 :                 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
     456             :         }
     457             : 
     458           2 :         torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
     459             :                 "conversion gave incorrect result\n");
     460             : 
     461           2 :         talloc_free(mem_ctx);
     462             : 
     463           2 :         return true;
     464             : }
     465             : 
     466        2354 : struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
     467             : {
     468        2354 :         struct torture_suite *suite = torture_suite_create(mem_ctx, "iconv");
     469             : 
     470        2354 :         torture_suite_add_simple_test(suite, "string2key",
     471             :                                       test_string2key);
     472             : 
     473        2354 :         torture_suite_add_simple_test(suite, "next_codepoint()",
     474             :                                       test_next_codepoint);
     475             : 
     476        2354 :         torture_suite_add_simple_test(suite, "first 1M codepoints",
     477             :                                       test_first_1m);
     478             : 
     479        2354 :         torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
     480             :                                       test_random_5m);
     481             : 
     482        2354 :         torture_suite_add_simple_test(suite, "string2key",
     483             :                                       test_string2key);
     484        2354 :         return suite;
     485             : }
     486             : 
     487             : #else
     488             : 
     489             : struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx) 
     490             : {
     491             :         printf("No native iconv library - can't run iconv test\n");
     492             :         return NULL;
     493             : }
     494             : 
     495             : #endif

Generated by: LCOV version 1.14