LCOV - coverage report for master 2f515e9b

LCOV - code coverage report

Current view:	top level - lib/util/charset - iconv.c (source / functions)		Hit	Total	Coverage
Test:	coverage report for master 2f515e9b	Lines:	430	540	79.6 %
Date:	2024-04-21 15:09:00	Functions:	17	19	89.5 %

          Line data    Source code

       1             : /*
       2             :    Unix SMB/CIFS implementation.
       3             :    minimal iconv implementation
       4             :    Copyright (C) Andrew Tridgell 2001
       5             :    Copyright (C) Jelmer Vernooij 2002
       6             : 
       7             :    This program is free software; you can redistribute it and/or modify
       8             :    it under the terms of the GNU General Public License as published by
       9             :    the Free Software Foundation; either version 3 of the License, or
      10             :    (at your option) any later version.
      11             : 
      12             :    This program is distributed in the hope that it will be useful,
      13             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15             :    GNU General Public License for more details.
      16             : 
      17             :    You should have received a copy of the GNU General Public License
      18             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      19             : */
      20             : 
      21             : #include "replace.h"
      22             : #include "system/iconv.h"
      23             : #include "system/filesys.h"
      24             : #include "lib/util/byteorder.h"
      25             : #include "lib/util/dlinklist.h"
      26             : #include "lib/util/charset/charset.h"
      27             : #include "lib/util/charset/charset_proto.h"
      28             : 
      29             : #ifdef HAVE_ICU_I18N
      30             : #include <unicode/ustring.h>
      31             : #include <unicode/utrans.h>
      32             : #endif
      33             : 
      34             : #ifdef strcasecmp
      35             : #undef strcasecmp
      36             : #endif
      37             : 
      38             : /**
      39             :  * @file
      40             :  *
      41             :  * @brief Samba wrapper/stub for iconv character set conversion.
      42             :  *
      43             :  * iconv is the XPG2 interface for converting between character
      44             :  * encodings.  This file provides a Samba wrapper around it, and also
      45             :  * a simple reimplementation that is used if the system does not
      46             :  * implement iconv.
      47             :  *
      48             :  * Samba only works with encodings that are supersets of ASCII: ascii
      49             :  * characters like whitespace can be tested for directly, multibyte
      50             :  * sequences start with a byte with the high bit set, and strings are
      51             :  * terminated by a nul byte.
      52             :  *
      53             :  * Note that the only function provided by iconv is conversion between
      54             :  * characters.  It doesn't directly support operations like
      55             :  * uppercasing or comparison.  We have to convert to UTF-16LE and
      56             :  * compare there.
      57             :  *
      58             :  * @sa Samba Developers Guide
      59             :  **/
      60             : 
      61             : static size_t ascii_pull  (void *,const char **, size_t *, char **, size_t *);
      62             : static size_t ascii_push  (void *,const char **, size_t *, char **, size_t *);
      63             : static size_t latin1_pull(void *,const char **, size_t *, char **, size_t *);
      64             : static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
      65             : static size_t utf8_pull   (void *,const char **, size_t *, char **, size_t *);
      66             : static size_t utf8_push   (void *,const char **, size_t *, char **, size_t *);
      67             : static size_t utf16_munged_pull(void *,const char **, size_t *, char **, size_t *);
      68             : static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
      69             : static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
      70             : static size_t iconv_copy  (void *,const char **, size_t *, char **, size_t *);
      71             : static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
      72             : 
      73             : static const struct charset_functions builtin_functions[] = {
      74             :         /* windows is closest to UTF-16 */
      75             :         {
      76             :                 .name = "UCS-2LE",
      77             :                 .pull = iconv_copy,
      78             :                 .push = iconv_copy
      79             :         },
      80             :         {
      81             :                 .name = "UTF-16LE",
      82             :                 .pull = iconv_copy,
      83             :                 .push = iconv_copy
      84             :         },
      85             :         {
      86             :                 .name = "UCS-2BE",
      87             :                 .pull = iconv_swab,
      88             :                 .push = iconv_swab
      89             :         },
      90             :         {
      91             :                 .name = "UTF-16BE",
      92             :                 .pull = iconv_swab,
      93             :                 .push = iconv_swab
      94             :         },
      95             : 
      96             :         /* we include the UTF-8 alias to cope with differing locale settings */
      97             :         {
      98             :                 .name = "UTF8",
      99             :                 .pull = utf8_pull,
     100             :                 .push = utf8_push
     101             :         },
     102             :         {
     103             :                 .name = "UTF-8",
     104             :                 .pull = utf8_pull,
     105             :                 .push = utf8_push
     106             :         },
     107             : 
     108             :         /* this handles the munging needed for String2Key */
     109             :         {
     110             :                 .name = "UTF16_MUNGED",
     111             :                 .pull = utf16_munged_pull,
     112             :                 .push = iconv_copy,
     113             :                 .samba_internal_charset = true
     114             :         },
     115             : 
     116             :         {
     117             :                 .name = "ASCII",
     118             :                 .pull = ascii_pull,
     119             :                 .push = ascii_push
     120             :         },
     121             :         {
     122             :                 .name = "646",
     123             :                 .pull = ascii_pull,
     124             :                 .push = ascii_push
     125             :         },
     126             :         {
     127             :                 .name = "ISO-8859-1",
     128             :                 .pull = latin1_pull,
     129             :                 .push = latin1_push
     130             :         },
     131             : #ifdef DEVELOPER
     132             :         {
     133             :                 .name = "WEIRD",
     134             :                 .pull = weird_pull,
     135             :                 .push = weird_push,
     136             :                 .samba_internal_charset = true
     137             :         },
     138             : #endif
     139             : #ifdef DARWINOS
     140             :         {
     141             :                 .name = "MACOSXFS",
     142             :                 .pull = macosxfs_encoding_pull,
     143             :                 .push = macosxfs_encoding_push,
     144             :                 .samba_internal_charset = true
     145             :         },
     146             : #endif
     147             :         {
     148             :                 .name = "UCS2-HEX",
     149             :                 .pull = ucs2hex_pull,
     150             :                 .push = ucs2hex_push,
     151             :                 .samba_internal_charset = true
     152             :         }
     153             : };
     154             : 
     155             : #ifdef HAVE_NATIVE_ICONV
     156             : /* if there was an error then reset the internal state,
     157             :    this ensures that we don't have a shift state remaining for
     158             :    character sets like SJIS */
     159     5274872 : static size_t sys_iconv(void *cd,
     160             :                         const char **inbuf, size_t *inbytesleft,
     161             :                         char **outbuf, size_t *outbytesleft)
     162             : {
     163     5274872 :         size_t ret = iconv((iconv_t)cd,
     164             :                            discard_const_p(char *, inbuf), inbytesleft,
     165             :                            outbuf, outbytesleft);
     166     5274872 :         if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
     167     5274872 :         return ret;
     168             : }
     169             : #endif
     170             : 
     171             : #ifdef HAVE_ICU_I18N
     172          74 : static size_t sys_uconv(void *cd,
     173             :                         const char **inbuf,
     174             :                         size_t *inbytesleft,
     175             :                         char **outbuf,
     176             :                         size_t *outbytesleft)
     177          74 : {
     178          74 :         UTransliterator *t = (UTransliterator *)cd;
     179          74 :         size_t bufsize = *inbytesleft * 2;
     180          74 :         UChar ustr[bufsize];
     181          74 :         UChar *up = NULL;
     182          74 :         char *p = NULL;
     183          14 :         int32_t ustrlen;
     184          14 :         int32_t limit;
     185          14 :         int32_t converted_len;
     186          14 :         size_t inbuf_consumed;
     187          14 :         size_t outbut_consumed;
     188          14 :         UErrorCode ue;
     189             : 
     190             :         /* Convert from UTF8 to UCS2 */
     191          74 :         ue = 0;
     192          74 :         up = u_strFromUTF8(ustr,           /* dst */
     193             :                            bufsize,        /* dst buflen */
     194             :                            &converted_len, /* dst written */
     195             :                            *inbuf,         /* src */
     196          60 :                            *inbytesleft,   /* src length */
     197             :                            &ue);
     198          74 :         if (up == NULL || U_FAILURE(ue)) {
     199           0 :                 return -1;
     200             :         }
     201          74 :         if (converted_len > bufsize) {
     202             :                 /*
     203             :                  * u_strFromUTF8() returns the required size in
     204             :                  * converted_len. In theory this should never overflow as the
     205             :                  * ustr[] array is allocated with a size twice as big as
     206             :                  * inbytesleft and converted_len should be equal to inbytesleft,
     207             :                  * but you never know...
     208             :                  */
     209           0 :                 errno = EOVERFLOW;
     210           0 :                 return -1;
     211             :         }
     212          74 :         inbuf_consumed = converted_len;
     213             : 
     214             :         /*
     215             :          * The following transliteration function takes two parameters, the
     216             :          * length of the text to be converted (converted_len) and a limit which
     217             :          * may be smaller then converted_len. We just set limit to converted_len
     218             :          * and also ignore the value returned in limit.
     219             :          */
     220          74 :         limit = converted_len;
     221             : 
     222             :         /* Inplace transliteration */
     223          74 :         utrans_transUChars(t,
     224             :                            ustr,           /* text */
     225             :                            &converted_len, /* text length */
     226             :                            bufsize,        /* text buflen */
     227             :                            0,              /* start */
     228             :                            &limit,         /* limit */
     229             :                            &ue);
     230          74 :         if (U_FAILURE(ue)) {
     231           0 :                 return -1;
     232             :         }
     233          74 :         if (converted_len > bufsize) {
     234             :                 /*
     235             :                  * In theory this should never happen as the ustr[] array is
     236             :                  * allocated with a size twice as big as inbytesleft and
     237             :                  * converted_len should be equal to inbytesleft, but you never
     238             :                  * know...
     239             :                  */
     240           0 :                 errno = EOVERFLOW;
     241           0 :                 return -1;
     242             :         }
     243          74 :         ustrlen = converted_len;
     244             : 
     245             :         /* Convert from UCS2 back to UTF8 */
     246          74 :         ue = 0;
     247          88 :         p = u_strToUTF8(*outbuf,        /* dst */
     248          74 :                         *outbytesleft,  /* dst buflen */
     249             :                         &converted_len, /* dst required length */
     250             :                         ustr,           /* src */
     251             :                         ustrlen,        /* src length */
     252             :                         &ue);
     253          74 :         if (p == NULL || U_FAILURE(ue)) {
     254           0 :                 return -1;
     255             :         }
     256             : 
     257          68 :         outbut_consumed = converted_len;
     258          68 :         if (converted_len > *outbytesleft) {
     259             :                 /*
     260             :                  * The caller's result buffer is too small...
     261             :                 */
     262           0 :                 outbut_consumed = *outbytesleft;
     263             :         }
     264             : 
     265          68 :         *inbuf += inbuf_consumed;
     266          68 :         *inbytesleft -= inbuf_consumed;
     267          68 :         *outbuf += outbut_consumed;
     268          68 :         *outbytesleft -= outbut_consumed;
     269             : 
     270          68 :         return converted_len;
     271             : }
     272             : #endif
     273             : 
     274             : /**
     275             :  * This is a simple portable iconv() implementation.
     276             :  *
     277             :  * It only knows about a very small number of character sets - just
     278             :  * enough that Samba works on systems that don't have iconv.
     279             :  **/
     280   151093756 : _PUBLIC_ size_t smb_iconv(smb_iconv_t cd,
     281             :                  const char **inbuf, size_t *inbytesleft,
     282             :                  char **outbuf, size_t *outbytesleft)
     283             : {
     284             :         /* in many cases we can go direct */
     285   151093756 :         if (cd->direct) {
     286    38460624 :                 return cd->direct(cd->cd_direct,
     287             :                                   inbuf, inbytesleft, outbuf, outbytesleft);
     288             :         }
     289             : 
     290             :         /* otherwise we have to do it chunks at a time */
     291             :         {
     292             : #ifndef SMB_ICONV_BUFSIZE
     293             : #define SMB_ICONV_BUFSIZE 2048
     294             : #endif
     295             :                 size_t bufsize;
     296             :                 char cvtbuf[SMB_ICONV_BUFSIZE];
     297             : 
     298   225504561 :                 while (*inbytesleft > 0) {
     299   112871471 :                         char *bufp1 = cvtbuf;
     300   112871471 :                         const char *bufp2 = cvtbuf;
     301   112871471 :                         int saved_errno = errno;
     302   112871471 :                         bool pull_failed = false;
     303   112871471 :                         bufsize = SMB_ICONV_BUFSIZE;
     304             : 
     305   112871471 :                         if (cd->pull(cd->cd_pull,
     306             :                                      inbuf, inbytesleft, &bufp1, &bufsize) == -1
     307      238349 :                             && errno != E2BIG) {
     308          10 :                                 saved_errno = errno;
     309          10 :                                 pull_failed = true;
     310             :                         }
     311             : 
     312   112871471 :                         bufsize = SMB_ICONV_BUFSIZE - bufsize;
     313             : 
     314   112871471 :                         if (cd->push(cd->cd_push,
     315             :                                      &bufp2, &bufsize,
     316             :                                      outbuf, outbytesleft) == -1) {
     317          42 :                                 return -1;
     318   112871439 :                         } else if (pull_failed) {
     319             :                                 /* We want the pull errno if possible */
     320          10 :                                 errno = saved_errno;
     321          10 :                                 return -1;
     322             :                         }
     323             :                 }
     324             :         }
     325             : 
     326   112633090 :         return 0;
     327             : }
     328             : 
     329     7424285 : static bool is_utf16(const char *name)
     330             : {
     331     8203103 :         return strcasecmp(name, "UCS-2LE") == 0 ||
     332     7424285 :                 strcasecmp(name, "UTF-16LE") == 0;
     333             : }
     334             : 
     335     4255252 : static int smb_iconv_t_destructor(smb_iconv_t hwd)
     336             : {
     337             : #ifdef HAVE_ICU_I18N
     338             :         /*
     339             :          * This has to come first, as the cd_direct member won't be an iconv
     340             :          * handle and must not be passed to iconv_close().
     341             :          */
     342     4255252 :         if (hwd->direct == sys_uconv) {
     343          40 :                 utrans_close(hwd->cd_direct);
     344          40 :                 return 0;
     345             :         }
     346             : #endif
     347             : #ifdef HAVE_NATIVE_ICONV
     348     4255212 :         if (hwd->cd_pull != NULL && hwd->cd_pull != (iconv_t)-1)
     349       28329 :                 iconv_close(hwd->cd_pull);
     350     4255212 :         if (hwd->cd_push != NULL && hwd->cd_push != (iconv_t)-1)
     351       11689 :                 iconv_close(hwd->cd_push);
     352     4255212 :         if (hwd->cd_direct != NULL && hwd->cd_direct != (iconv_t)-1)
     353     1000038 :                 iconv_close(hwd->cd_direct);
     354             : #endif
     355             : 
     356      161923 :         return 0;
     357             : }
     358             : 
     359     4387334 : _PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 
     360             :                               const char *fromcode, bool use_builtin_handlers)
     361             : {
     362     4096502 :         smb_iconv_t ret;
     363     4387334 :         const struct charset_functions *from=NULL, *to=NULL;
     364     4096502 :         int i;
     365             : 
     366     4387334 :         ret = (smb_iconv_t)talloc_named(mem_ctx,
     367             :                                         sizeof(*ret),
     368             :                                         "iconv(%s,%s)", tocode, fromcode);
     369     4387334 :         if (!ret) {
     370           0 :                 errno = ENOMEM;
     371           0 :                 return (smb_iconv_t)-1;
     372             :         }
     373     4387334 :         memset(ret, 0, sizeof(*ret));
     374     4387334 :         talloc_set_destructor(ret, smb_iconv_t_destructor);
     375             : 
     376             :         /* check for the simplest null conversion */
     377     4387334 :         if (strcmp(fromcode, tocode) == 0) {
     378         275 :                 ret->direct = iconv_copy;
     379         275 :                 return ret;
     380             :         }
     381             : 
     382             :         /* check if we have a builtin function for this conversion */
     383    57031767 :         for (i=0;i<ARRAY_SIZE(builtin_functions);i++) {
     384    52644708 :                 if (strcasecmp(fromcode, builtin_functions[i].name) == 0) {
     385     3831680 :                         if (use_builtin_handlers || builtin_functions[i].samba_internal_charset) {
     386     3831652 :                                 from = &builtin_functions[i];
     387             :                         }
     388             :                 }
     389    52644708 :                 if (strcasecmp(tocode, builtin_functions[i].name) == 0) {
     390     3862929 :                         if (use_builtin_handlers || builtin_functions[i].samba_internal_charset) {
     391     3862908 :                                 to = &builtin_functions[i];
     392             :                         }
     393             :                 }
     394             :         }
     395             : 
     396             : #ifdef HAVE_NATIVE_ICONV
     397             :         /* the from and to variables indicate a samba module or
     398             :          * internal conversion, ret->pull and ret->push are
     399             :          * initialised only in this block for iconv based
     400             :          * conversions */
     401             : 
     402     4387059 :         if (from == NULL) {
     403      555407 :                 ret->cd_pull = iconv_open("UTF-16LE", fromcode);
     404      555407 :                 if (ret->cd_pull == (iconv_t)-1)
     405          46 :                         ret->cd_pull = iconv_open("UCS-2LE", fromcode);
     406      555407 :                 if (ret->cd_pull != (iconv_t)-1) {
     407      555361 :                         ret->pull = sys_iconv;
     408             :                 }
     409             :         }
     410             : 
     411     4387059 :         if (to == NULL) {
     412      524151 :                 ret->cd_push = iconv_open(tocode, "UTF-16LE");
     413      524151 :                 if (ret->cd_push == (iconv_t)-1)
     414          46 :                         ret->cd_push = iconv_open(tocode, "UCS-2LE");
     415      524151 :                 if (ret->cd_push != (iconv_t)-1) {
     416      524105 :                         ret->push = sys_iconv;
     417             :                 }
     418             :         }
     419             : #endif
     420             : 
     421             : #ifdef HAVE_ICU_I18N
     422     4387059 :         if (strcasecmp(fromcode, "UTF8-NFD") == 0 &&
     423          22 :             strcasecmp(tocode, "UTF8-NFC") == 0)
     424             :         {
     425           2 :                 U_STRING_DECL(t, "any-nfc", 7);
     426          22 :                 UErrorCode ue = 0;
     427             : 
     428           2 :                 U_STRING_INIT(t, "any-nfc", 7);
     429             : 
     430          22 :                 ret->cd_direct = utrans_openU(t,
     431             :                                               strlen("any-nfc"),
     432             :                                               UTRANS_FORWARD,
     433             :                                               NULL,
     434             :                                               0,
     435             :                                               NULL,
     436             :                                               &ue);
     437          22 :                 if (U_FAILURE(ue)) {
     438           0 :                         return (smb_iconv_t)-1;
     439             :                 }
     440          22 :                 ret->direct = sys_uconv;
     441          22 :                 return ret;
     442             :         }
     443             : 
     444     4387037 :         if (strcasecmp(fromcode, "UTF8-NFC") == 0 &&
     445          24 :             strcasecmp(tocode, "UTF8-NFD") == 0)
     446             :         {
     447           4 :                 U_STRING_DECL(tname, "any-nfd", 7);
     448          24 :                 UErrorCode ue = 0;
     449             : 
     450           4 :                 U_STRING_INIT(tname, "any-nfd", 7);
     451             : 
     452          24 :                 ret->cd_direct = utrans_openU(tname,
     453             :                                               7,
     454             :                                               UTRANS_FORWARD,
     455             :                                               NULL,
     456             :                                               0,
     457             :                                               NULL,
     458             :                                               &ue);
     459          24 :                 if (U_FAILURE(ue)) {
     460           0 :                         return (smb_iconv_t)-1;
     461             :                 }
     462          24 :                 ret->direct = sys_uconv;
     463          24 :                 return ret;
     464             :         }
     465             : #endif
     466             : 
     467     4387013 :         if (ret->pull == NULL && from == NULL) {
     468           0 :                 goto failed;
     469             :         }
     470             : 
     471     4387013 :         if (ret->push == NULL && to == NULL) {
     472           0 :                 goto failed;
     473             :         }
     474             : 
     475             :         /* check for conversion to/from ucs2 */
     476     4387013 :         if (is_utf16(fromcode) && to) {
     477     1634227 :                 ret->direct = to->push;
     478     1634227 :                 return ret;
     479             :         }
     480     2752786 :         if (is_utf16(tocode) && from) {
     481     1608307 :                 ret->direct = from->pull;
     482     1608307 :                 return ret;
     483             :         }
     484             : 
     485             : #ifdef HAVE_NATIVE_ICONV
     486     1144479 :         if (is_utf16(fromcode)) {
     487      500024 :                 ret->direct = sys_iconv;
     488      500024 :                 ret->cd_direct = ret->cd_push;
     489      500024 :                 ret->cd_push = NULL;
     490      500024 :                 return ret;
     491             :         }
     492      644455 :         if (is_utf16(tocode)) {
     493      500014 :                 ret->direct = sys_iconv;
     494      500014 :                 ret->cd_direct = ret->cd_pull;
     495      500014 :                 ret->cd_pull = NULL;
     496      500014 :                 return ret;
     497             :         }
     498             : #endif
     499             : 
     500             :         /* the general case has to go via a buffer */
     501      144441 :         if (!ret->pull) ret->pull = from->pull;
     502      144441 :         if (!ret->push) ret->push = to->push;
     503      142237 :         return ret;
     504             : 
     505           0 : failed:
     506           0 :         talloc_free(ret);
     507           0 :         errno = EINVAL;
     508           0 :         return (smb_iconv_t)-1;
     509             : }
     510             : 
     511             : /*
     512             :   simple iconv_open() wrapper
     513             :  */
     514           6 : _PUBLIC_ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
     515             : {
     516           6 :         return smb_iconv_open_ex(NULL, tocode, fromcode, true);
     517             : }
     518             : 
     519             : /*
     520             :   simple iconv_close() wrapper
     521             : */
     522     4255210 : _PUBLIC_ int smb_iconv_close(smb_iconv_t cd)
     523             : {
     524     4255210 :         talloc_free(cd);
     525     4255210 :         return 0;
     526             : }
     527             : 
     528             : 
     529             : /**********************************************************************
     530             :  the following functions implement the builtin character sets in Samba
     531             :  and also the "test" character sets that are designed to test
     532             :  multi-byte character set support for english users
     533             : ***********************************************************************/
     534             : 
     535             : /*
     536             :   this takes an ASCII sequence and produces a UTF16 sequence
     537             : 
     538             :   The first 127 codepoints of latin1 matches the first 127 codepoints
     539             :   of unicode, and so can be put into the first byte of UTF16LE
     540             : 
     541             :  */
     542             : 
     543           5 : static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
     544             :                          char **outbuf, size_t *outbytesleft)
     545             : {
     546          26 :         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
     547          25 :                 if (((*inbuf)[0] & 0x7F) != (*inbuf)[0]) {
     548             :                         /* If this is multi-byte, then it isn't legal ASCII */
     549           4 :                         errno = EILSEQ;
     550           4 :                         return -1;
     551             :                 }
     552          21 :                 (*outbuf)[0] = (*inbuf)[0];
     553          21 :                 (*outbuf)[1] = 0;
     554          21 :                 (*inbytesleft)  -= 1;
     555          21 :                 (*outbytesleft) -= 2;
     556          21 :                 (*inbuf)  += 1;
     557          21 :                 (*outbuf) += 2;
     558             :         }
     559             : 
     560           1 :         if (*inbytesleft > 0) {
     561           0 :                 errno = E2BIG;
     562           0 :                 return -1;
     563             :         }
     564             : 
     565           0 :         return 0;
     566             : }
     567             : 
     568             : /*
     569             :   this takes a UTF16 sequence and produces an ASCII sequence
     570             : 
     571             :   The first 127 codepoints of ASCII matches the first 127 codepoints
     572             :   of unicode, and so can be read directly from the first byte of UTF16LE
     573             : 
     574             :  */
     575           4 : static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
     576             :                          char **outbuf, size_t *outbytesleft)
     577             : {
     578           4 :         int ir_count=0;
     579             : 
     580           5 :         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
     581           4 :                 if (((*inbuf)[0] & 0x7F) != (*inbuf)[0] ||
     582           1 :                         (*inbuf)[1] != 0) {
     583             :                         /* If this is multi-byte, then it isn't legal ASCII */
     584           3 :                         errno = EILSEQ;
     585           3 :                         return -1;
     586             :                 }
     587           1 :                 (*outbuf)[0] = (*inbuf)[0];
     588           1 :                 (*inbytesleft)  -= 2;
     589           1 :                 (*outbytesleft) -= 1;
     590           1 :                 (*inbuf)  += 2;
     591           1 :                 (*outbuf) += 1;
     592             :         }
     593             : 
     594           1 :         if (*inbytesleft == 1) {
     595           0 :                 errno = EINVAL;
     596           0 :                 return -1;
     597             :         }
     598             : 
     599           1 :         if (*inbytesleft > 1) {
     600           0 :                 errno = E2BIG;
     601           0 :                 return -1;
     602             :         }
     603             : 
     604           0 :         return ir_count;
     605             : }
     606             : 
     607             : /*
     608             :   this takes a latin1/ISO-8859-1 sequence and produces a UTF16 sequence
     609             : 
     610             :   The first 256 codepoints of latin1 matches the first 256 codepoints
     611             :   of unicode, and so can be put into the first byte of UTF16LE
     612             : 
     613             :  */
     614         151 : static size_t latin1_pull(void *cd, const char **inbuf, size_t *inbytesleft,
     615             :                           char **outbuf, size_t *outbytesleft)
     616             : {
     617        2965 :         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
     618        2814 :                 (*outbuf)[0] = (*inbuf)[0];
     619        2814 :                 (*outbuf)[1] = 0;
     620        2814 :                 (*inbytesleft)  -= 1;
     621        2814 :                 (*outbytesleft) -= 2;
     622        2814 :                 (*inbuf)  += 1;
     623        2814 :                 (*outbuf) += 2;
     624             :         }
     625             : 
     626         151 :         if (*inbytesleft > 0) {
     627           2 :                 errno = E2BIG;
     628           2 :                 return -1;
     629             :         }
     630             : 
     631         140 :         return 0;
     632             : }
     633             : 
     634             : /*
     635             :   this takes a UTF16 sequence and produces a latin1/ISO-8859-1 sequence
     636             : 
     637             :   The first 256 codepoints of latin1 matches the first 256 codepoints
     638             :   of unicode, and so can be read directly from the first byte of UTF16LE
     639             : 
     640             :  */
     641         211 : static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
     642             :                          char **outbuf, size_t *outbytesleft)
     643             : {
     644         211 :         int ir_count=0;
     645             : 
     646        2293 :         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
     647        2082 :                 (*outbuf)[0] = (*inbuf)[0];
     648        2082 :                 if ((*inbuf)[1] != 0) {
     649             :                         /* If this is multi-byte, then it isn't legal latin1 */
     650           0 :                         errno = EILSEQ;
     651           0 :                         return -1;
     652             :                 }
     653        2082 :                 (*inbytesleft)  -= 2;
     654        2082 :                 (*outbytesleft) -= 1;
     655        2082 :                 (*inbuf)  += 2;
     656        2082 :                 (*outbuf) += 1;
     657             :         }
     658             : 
     659         211 :         if (*inbytesleft == 1) {
     660           0 :                 errno = EINVAL;
     661           0 :                 return -1;
     662             :         }
     663             : 
     664         211 :         if (*inbytesleft > 1) {
     665           1 :                 errno = E2BIG;
     666           1 :                 return -1;
     667             :         }
     668             : 
     669         204 :         return ir_count;
     670             : }
     671             : 
     672           0 : static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
     673             :                          char **outbuf, size_t *outbytesleft)
     674             : {
     675           0 :         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
     676           0 :                 uint8_t hi = 0, lo = 0;
     677           0 :                 bool ok;
     678             : 
     679           0 :                 if ((*inbuf)[0] != '@') {
     680             :                         /* seven bit ascii case */
     681           0 :                         (*outbuf)[0] = (*inbuf)[0];
     682           0 :                         (*outbuf)[1] = 0;
     683           0 :                         (*inbytesleft)  -= 1;
     684           0 :                         (*outbytesleft) -= 2;
     685           0 :                         (*inbuf)  += 1;
     686           0 :                         (*outbuf) += 2;
     687           0 :                         continue;
     688             :                 }
     689             :                 /* it's a hex character */
     690           0 :                 if (*inbytesleft < 5) {
     691           0 :                         errno = EINVAL;
     692           0 :                         return -1;
     693             :                 }
     694             : 
     695           0 :                 ok = hex_byte(&(*inbuf)[1], &hi) && hex_byte(&(*inbuf)[3], &lo);
     696           0 :                 if (!ok) {
     697           0 :                         errno = EILSEQ;
     698           0 :                         return -1;
     699             :                 }
     700             : 
     701           0 :                 (*outbuf)[0] = lo;
     702           0 :                 (*outbuf)[1] = hi;
     703           0 :                 (*inbytesleft)  -= 5;
     704           0 :                 (*outbytesleft) -= 2;
     705           0 :                 (*inbuf)  += 5;
     706           0 :                 (*outbuf) += 2;
     707             :         }
     708             : 
     709           0 :         if (*inbytesleft > 0) {
     710           0 :                 errno = E2BIG;
     711           0 :                 return -1;
     712             :         }
     713             : 
     714           0 :         return 0;
     715             : }
     716             : 
     717           0 : static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
     718             :                            char **outbuf, size_t *outbytesleft)
     719             : {
     720           0 :         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
     721           0 :                 char buf[6];
     722             : 
     723           0 :                 if ((*inbuf)[1] == 0 &&
     724           0 :                     ((*inbuf)[0] & 0x80) == 0 &&
     725           0 :                     (*inbuf)[0] != '@') {
     726           0 :                         (*outbuf)[0] = (*inbuf)[0];
     727           0 :                         (*inbytesleft)  -= 2;
     728           0 :                         (*outbytesleft) -= 1;
     729           0 :                         (*inbuf)  += 2;
     730           0 :                         (*outbuf) += 1;
     731           0 :                         continue;
     732             :                 }
     733           0 :                 if (*outbytesleft < 5) {
     734           0 :                         errno = E2BIG;
     735           0 :                         return -1;
     736             :                 }
     737           0 :                 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
     738           0 :                 memcpy(*outbuf, buf, 5);
     739           0 :                 (*inbytesleft)  -= 2;
     740           0 :                 (*outbytesleft) -= 5;
     741           0 :                 (*inbuf)  += 2;
     742           0 :                 (*outbuf) += 5;
     743             :         }
     744             : 
     745           0 :         if (*inbytesleft == 1) {
     746           0 :                 errno = EINVAL;
     747           0 :                 return -1;
     748             :         }
     749             : 
     750           0 :         if (*inbytesleft > 1) {
     751           0 :                 errno = E2BIG;
     752           0 :                 return -1;
     753             :         }
     754             : 
     755           0 :         return 0;
     756             : }
     757             : 
     758       72670 : static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
     759             :                          char **outbuf, size_t *outbytesleft)
     760             : {
     761        8282 :         int n;
     762             : 
     763       72670 :         n = MIN(*inbytesleft, *outbytesleft);
     764             : 
     765       72670 :         swab(*inbuf, *outbuf, (n&~1));
     766       72670 :         if (n&1) {
     767           0 :                 (*outbuf)[n-1] = 0;
     768             :         }
     769             : 
     770       72670 :         (*inbytesleft) -= n;
     771       72670 :         (*outbytesleft) -= n;
     772       72670 :         (*inbuf) += n;
     773       72670 :         (*outbuf) += n;
     774             : 
     775       72670 :         if (*inbytesleft > 0) {
     776         182 :                 errno = E2BIG;
     777         182 :                 return -1;
     778             :         }
     779             : 
     780       64206 :         return 0;
     781             : }
     782             : 
     783             : 
     784        4981 : static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
     785             :                          char **outbuf, size_t *outbytesleft)
     786             : {
     787          31 :         int n;
     788             : 
     789        4981 :         n = MIN(*inbytesleft, *outbytesleft);
     790             : 
     791        4981 :         memmove(*outbuf, *inbuf, n);
     792             : 
     793        4981 :         (*inbytesleft) -= n;
     794        4981 :         (*outbytesleft) -= n;
     795        4981 :         (*inbuf) += n;
     796        4981 :         (*outbuf) += n;
     797             : 
     798        4981 :         if (*inbytesleft > 0) {
     799           0 :                 errno = E2BIG;
     800           0 :                 return -1;
     801             :         }
     802             : 
     803        4950 :         return 0;
     804             : }
     805             : 
     806             : /*
     807             :   this takes a UTF8 sequence and produces a UTF16 sequence
     808             :  */
     809   126986428 : static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
     810             :                          char **outbuf, size_t *outbytesleft)
     811             : {
     812   126986428 :         size_t in_left=*inbytesleft, out_left=*outbytesleft;
     813   126986428 :         const uint8_t *c = (const uint8_t *)*inbuf;
     814   126986428 :         uint8_t *uc = (uint8_t *)*outbuf;
     815             : 
     816  2413335537 :         while (in_left >= 1 && out_left >= 2) {
     817  2287405386 :                 if ((c[0] & 0x80) == 0) {
     818  2270128735 :                         uc[0] = c[0];
     819  2270128735 :                         uc[1] = 0;
     820  2270128735 :                         c  += 1;
     821  2270128735 :                         in_left  -= 1;
     822  2270128735 :                         out_left -= 2;
     823  2270128735 :                         uc += 2;
     824  2270128735 :                         continue;
     825             :                 }
     826             : 
     827    17276651 :                 if ((c[0] & 0xe0) == 0xc0) {
     828     7162239 :                         if (in_left < 2 ||
     829     7162237 :                             (c[1] & 0xc0) != 0x80) {
     830         242 :                                 errno = EILSEQ;
     831         242 :                                 goto error;
     832             :                         }
     833     7161997 :                         uc[1] = (c[0]>>2) & 0x7;
     834     7161997 :                         uc[0] = (c[0]<<6) | (c[1]&0x3f);
     835     7161997 :                         if (uc[1] == 0 && uc[0] < 0x80) {
     836             :                                 /* this should have been a single byte */
     837           1 :                                 errno = EILSEQ;
     838           1 :                                 goto error;
     839             :                         }
     840     7161996 :                         c  += 2;
     841     7161996 :                         in_left  -= 2;
     842     7161996 :                         out_left -= 2;
     843     7161996 :                         uc += 2;
     844     7161996 :                         continue;
     845             :                 }
     846             : 
     847    10114412 :                 if ((c[0] & 0xf0) == 0xe0) {
     848     3897195 :                         unsigned int codepoint;
     849     6976095 :                         if (in_left < 3 ||
     850     6976091 :                             (c[1] & 0xc0) != 0x80 ||
     851     6975729 :                             (c[2] & 0xc0) != 0x80) {
     852         366 :                                 errno = EILSEQ;
     853         366 :                                 goto error;
     854             :                         }
     855     6975729 :                         codepoint = ((c[2] & 0x3f)        |
     856     6975729 :                                      ((c[1] & 0x3f) << 6) |
     857     6975729 :                                      ((c[0] & 0x0f) << 12));
     858             : 
     859     6975729 :                         if (codepoint < 0x800) {
     860             :                                 /* this should be a 1 or 2 byte sequence */
     861           1 :                                 errno = EILSEQ;
     862           1 :                                 goto error;
     863             :                         }
     864     6975728 :                         if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
     865             :                                 /*
     866             :                                  * This is an invalid codepoint, per
     867             :                                  * RFC3629, as it encodes part of a
     868             :                                  * UTF-16 surrogate pair for a
     869             :                                  * character over U+10000, which ought
     870             :                                  * to have been encoded as a four byte
     871             :                                  * utf-8 sequence.
     872             :                                  *
     873             :                                  * Prior to Vista, Windows might
     874             :                                  * sometimes produce invalid strings
     875             :                                  * where a utf-16 sequence containing
     876             :                                  * surrogate pairs was converted
     877             :                                  * "verbatim" into utf-8, instead of
     878             :                                  * encoding the actual codepoint. This
     879             :                                  * format is sometimes called "WTF-8".
     880             :                                  *
     881             :                                  * If we were to support that, we'd
     882             :                                  * have a branch here for the case
     883             :                                  * where the codepoint is between
     884             :                                  * 0xd800 and 0xdbff (a "high
     885             :                                  * surrogate"), and read a *six*
     886             :                                  * character sequence from there which
     887             :                                  * would include a low surrogate. But
     888             :                                  * that would undermine the
     889             :                                  * hard-learnt principle that each
     890             :                                  * character should only have one
     891             :                                  * encoding.
     892             :                                  */
     893           0 :                                 errno = EILSEQ;
     894           0 :                                 goto error;
     895             :                         }
     896             : 
     897     6975728 :                         uc[0] = codepoint & 0xff;
     898     6975728 :                         uc[1] = codepoint >> 8;
     899     6975728 :                         c  += 3;
     900     6975728 :                         in_left  -= 3;
     901     6975728 :                         out_left -= 2;
     902     6975728 :                         uc += 2;
     903     6975728 :                         continue;
     904             :                 }
     905             : 
     906     3138317 :                 if ((c[0] & 0xf8) == 0xf0) {
     907     2956531 :                         unsigned int codepoint;
     908     3137475 :                         if (in_left < 4 ||
     909     3137475 :                             (c[1] & 0xc0) != 0x80 ||
     910     3137475 :                             (c[2] & 0xc0) != 0x80 ||
     911     3137475 :                             (c[3] & 0xc0) != 0x80) {
     912           0 :                                 errno = EILSEQ;
     913           0 :                                 goto error;
     914             :                         }
     915     3137475 :                         codepoint =
     916     3137475 :                                 (c[3]&0x3f) |
     917     3137475 :                                 ((c[2]&0x3f)<<6) |
     918     3137475 :                                 ((c[1]&0x3f)<<12) |
     919     3137475 :                                 ((c[0]&0x7)<<18);
     920     3137475 :                         if (codepoint < 0x10000) {
     921             :                                 /* reject UTF-8 characters that are not
     922             :                                    minimally packed */
     923           1 :                                 errno = EILSEQ;
     924           1 :                                 goto error;
     925             :                         }
     926     3137474 :                         if (codepoint > 0x10ffff) {
     927             :                                 /*
     928             :                                  * Unicode stops at 0x10ffff, and if
     929             :                                  * we ignore that, we'll end up
     930             :                                  * encoding the wrong characters in
     931             :                                  * the surrogate pair.
     932             :                                  */
     933           0 :                                 errno = EILSEQ;
     934           0 :                                 goto error;
     935             :                         }
     936             : 
     937     3137474 :                         codepoint -= 0x10000;
     938             : 
     939     3137474 :                         if (out_left < 4) {
     940     1054824 :                                 errno = E2BIG;
     941     1054824 :                                 goto error;
     942             :                         }
     943             : 
     944     2082650 :                         uc[0] = (codepoint>>10) & 0xFF;
     945     2082650 :                         uc[1] = (codepoint>>18) | 0xd8;
     946     2082650 :                         uc[2] = codepoint & 0xFF;
     947     2082650 :                         uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
     948     2082650 :                         c  += 4;
     949     2082650 :                         in_left  -= 4;
     950     2082650 :                         out_left -= 4;
     951     2082650 :                         uc += 4;
     952     2082650 :                         continue;
     953             :                 }
     954             : 
     955             :                 /* we don't handle 5 byte sequences */
     956         842 :                 errno = EINVAL;
     957         842 :                 goto error;
     958             :         }
     959             : 
     960   125930151 :         if (in_left > 0) {
     961     2046583 :                 errno = E2BIG;
     962     2046583 :                 goto error;
     963             :         }
     964             : 
     965   123883568 :         *inbytesleft = in_left;
     966   123883568 :         *outbytesleft = out_left;
     967   123883568 :         *inbuf = (const char *)c;
     968   123883568 :         *outbuf = (char *)uc;
     969   123883568 :         return 0;
     970             : 
     971     3102860 : error:
     972     3102860 :         *inbytesleft = in_left;
     973     3102860 :         *outbytesleft = out_left;
     974     3102860 :         *inbuf = (const char *)c;
     975     3102860 :         *outbuf = (char *)uc;
     976     3102860 :         return -1;
     977             : }
     978             : 
     979             : 
     980             : /*
     981             :   this takes a UTF16 sequence and produces a UTF8 sequence
     982             :  */
     983   131839861 : static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
     984             :                         char **outbuf, size_t *outbytesleft)
     985             : {
     986   131839861 :         size_t in_left=*inbytesleft, out_left=*outbytesleft;
     987   131839861 :         uint8_t *c = (uint8_t *)*outbuf;
     988   131839861 :         const uint8_t *uc = (const uint8_t *)*inbuf;
     989             : 
     990  3392303555 :         while (in_left >= 2 && out_left >= 1) {
     991    73024055 :                 unsigned int codepoint;
     992             : 
     993  3260864357 :                 if (uc[1] == 0 && !(uc[0] & 0x80)) {
     994             :                         /* simplest case */
     995  3247961842 :                         c[0] = uc[0];
     996  3247961842 :                         in_left  -= 2;
     997  3247961842 :                         out_left -= 1;
     998  3247961842 :                         uc += 2;
     999  3247961842 :                         c  += 1;
    1000  3247961842 :                         continue;
    1001             :                 }
    1002             : 
    1003    12902515 :                 if ((uc[1]&0xf8) == 0) {
    1004             :                         /* next simplest case */
    1005     4596239 :                         if (out_left < 2) {
    1006           4 :                                 errno = E2BIG;
    1007           4 :                                 goto error;
    1008             :                         }
    1009     4596235 :                         c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
    1010     4596235 :                         c[1] = 0x80 | (uc[0] & 0x3f);
    1011     4596235 :                         in_left  -= 2;
    1012     4596235 :                         out_left -= 2;
    1013     4596235 :                         uc += 2;
    1014     4596235 :                         c  += 2;
    1015     4596235 :                         continue;
    1016             :                 }
    1017             : 
    1018     8306276 :                 if ((uc[1] & 0xfc) == 0xdc) {
    1019      305339 :                         errno = EILSEQ;
    1020             : #ifndef HAVE_ICONV_ERRNO_ILLEGAL_MULTIBYTE
    1021             :                         if (in_left < 4) {
    1022             :                                 errno = EINVAL;
    1023             :                         }
    1024             : #endif
    1025      305339 :                         goto error;
    1026             :                 }
    1027             : 
    1028     8000937 :                 if ((uc[1] & 0xfc) != 0xd8) {
    1029     5914752 :                         codepoint = uc[0] | (uc[1]<<8);
    1030     5914752 :                         if (out_left < 3) {
    1031           3 :                                 errno = E2BIG;
    1032           3 :                                 goto error;
    1033             :                         }
    1034     5914749 :                         c[0] = 0xe0 | (codepoint >> 12);
    1035     5914749 :                         c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
    1036     5914749 :                         c[2] = 0x80 | (codepoint & 0x3f);
    1037             : 
    1038     5914749 :                         in_left  -= 2;
    1039     5914749 :                         out_left -= 3;
    1040     5914749 :                         uc  += 2;
    1041     5914749 :                         c   += 3;
    1042     5914749 :                         continue;
    1043             :                 }
    1044             : 
    1045             :                 /* its the first part of a 4 byte sequence */
    1046     2086185 :                 if (in_left < 4) {
    1047        3557 :                         errno = EINVAL;
    1048        3557 :                         goto error;
    1049             :                 }
    1050     2082628 :                 if ((uc[3] & 0xfc) != 0xdc) {
    1051       91760 :                         errno = EILSEQ;
    1052       91760 :                         goto error;
    1053             :                 }
    1054     1990868 :                 codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
    1055     1990868 :                                        (uc[0]<<10) | ((uc[1] & 0x3)<<18));
    1056             : 
    1057     1990868 :                 if (out_left < 4) {
    1058           0 :                         errno = E2BIG;
    1059           0 :                         goto error;
    1060             :                 }
    1061     1990868 :                 c[0] = 0xf0 | (codepoint >> 18);
    1062     1990868 :                 c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
    1063     1990868 :                 c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
    1064     1990868 :                 c[3] = 0x80 | (codepoint & 0x3f);
    1065             : 
    1066     1990868 :                 in_left  -= 4;
    1067     1990868 :                 out_left -= 4;
    1068     1990868 :                 uc       += 4;
    1069     1990868 :                 c        += 4;
    1070             :         }
    1071             : 
    1072   131439198 :         if (in_left == 1) {
    1073       50702 :                 errno = EINVAL;
    1074       50702 :                 goto error;
    1075             :         }
    1076             : 
    1077   131388496 :         if (in_left > 1) {
    1078          15 :                 errno = E2BIG;
    1079          15 :                 goto error;
    1080             :         }
    1081             : 
    1082   131388481 :         *inbytesleft = in_left;
    1083   131388481 :         *outbytesleft = out_left;
    1084   131388481 :         *inbuf  = (const char *)uc;
    1085   131388481 :         *outbuf = (char *)c;
    1086             : 
    1087   131388481 :         return 0;
    1088             : 
    1089      451380 : error:
    1090      451380 :         *inbytesleft = in_left;
    1091      451380 :         *outbytesleft = out_left;
    1092      451380 :         *inbuf  = (const char *)uc;
    1093      451380 :         *outbuf = (char *)c;
    1094      451380 :         return -1;
    1095             : }
    1096             : 
    1097             : 
    1098             : /*
    1099             :   this takes a UTF16 munged sequence, modifies it according to the
    1100             :   string2key rules, and produces a UTF16 sequence
    1101             : 
    1102             : The rules are:
    1103             : 
    1104             :     1) any 0x0000 characters are mapped to 0x0001
    1105             : 
    1106             :     2) convert any instance of 0xD800 - 0xDBFF (high surrogate)
    1107             :        without an immediately following 0xDC00 - 0x0xDFFF (low surrogate) to
    1108             :        U+FFFD (OBJECT REPLACEMENT CHARACTER).
    1109             : 
    1110             :     3) the same for any low surrogate that was not preceded by a high surrogate.
    1111             : 
    1112             :  */
    1113       24309 : static size_t utf16_munged_pull(void *cd, const char **inbuf, size_t *inbytesleft,
    1114             :                                char **outbuf, size_t *outbytesleft)
    1115             : {
    1116       24309 :         size_t in_left=*inbytesleft, out_left=*outbytesleft;
    1117       24309 :         uint8_t *c = (uint8_t *)*outbuf;
    1118       24309 :         const uint8_t *uc = (const uint8_t *)*inbuf;
    1119             : 
    1120      734269 :         while (in_left >= 2 && out_left >= 2) {
    1121      709960 :                 unsigned int codepoint = uc[0] | (uc[1]<<8);
    1122             : 
    1123      709960 :                 if (codepoint == 0) {
    1124           2 :                         codepoint = 1;
    1125             :                 }
    1126             : 
    1127      709960 :                 if ((codepoint & 0xfc00) == 0xd800) {
    1128             :                         /* a high surrogate */
    1129         112 :                         unsigned int codepoint2;
    1130        1348 :                         if (in_left < 4) {
    1131           4 :                                 codepoint = 0xfffd;
    1132           4 :                                 goto codepoint16;
    1133             :                         }
    1134        1344 :                         codepoint2 = uc[2] | (uc[3]<<8);
    1135        1344 :                         if ((codepoint2 & 0xfc00) != 0xdc00) {
    1136             :                                 /* high surrogate not followed by low
    1137             :                                    surrogate: convert to 0xfffd */
    1138        1326 :                                 codepoint = 0xfffd;
    1139        1326 :                                 goto codepoint16;
    1140             :                         }
    1141          18 :                         if (out_left < 4) {
    1142           0 :                                 errno = E2BIG;
    1143           0 :                                 goto error;
    1144             :                         }
    1145          18 :                         memcpy(c, uc, 4);
    1146          18 :                         in_left  -= 4;
    1147          18 :                         out_left -= 4;
    1148          18 :                         uc       += 4;
    1149          18 :                         c        += 4;
    1150          18 :                         continue;
    1151             :                 }
    1152             : 
    1153      708612 :                 if ((codepoint & 0xfc00) == 0xdc00) {
    1154             :                         /* low surrogate not preceded by high
    1155             :                            surrogate: convert to 0xfffd */
    1156        1353 :                         codepoint = 0xfffd;
    1157             :                 }
    1158             : 
    1159      707259 :         codepoint16:
    1160      709942 :                 c[0] = codepoint & 0xFF;
    1161      709942 :                 c[1] = (codepoint>>8) & 0xFF;
    1162             : 
    1163      709942 :                 in_left  -= 2;
    1164      709942 :                 out_left -= 2;
    1165      709942 :                 uc  += 2;
    1166      709942 :                 c   += 2;
    1167      709942 :                 continue;
    1168             :         }
    1169             : 
    1170       24309 :         if (in_left == 1) {
    1171           0 :                 errno = EINVAL;
    1172           0 :                 goto error;
    1173             :         }
    1174             : 
    1175       24309 :         if (in_left > 1) {
    1176           0 :                 errno = E2BIG;
    1177           0 :                 goto error;
    1178             :         }
    1179             : 
    1180       24309 :         *inbytesleft = in_left;
    1181       24309 :         *outbytesleft = out_left;
    1182       24309 :         *inbuf  = (const char *)uc;
    1183       24309 :         *outbuf = (char *)c;
    1184             : 
    1185       24309 :         return 0;
    1186             : 
    1187           0 : error:
    1188           0 :         *inbytesleft = in_left;
    1189           0 :         *outbytesleft = out_left;
    1190           0 :         *inbuf  = (const char *)uc;
    1191           0 :         *outbuf = (char *)c;
    1192           0 :         return -1;
    1193             : }
    1194             : 
    1195             : 
    1196             :

Generated by: LCOV version 1.14