Line data Source code
1 : /*
2 : * Functions for RFC 3986 percent-encoding.
3 : *
4 : * NOTE:
5 : *
6 : * This file was originally imported from the Squid project but has been
7 : * significantly altered. The licence below is reproduced intact, but refers
8 : * to files in Squid's repository, not in Samba. See COPYING for the GPLv3
9 : * notice (being the later version mentioned below).
10 : */
11 :
12 : /*
13 : * $Id$
14 : *
15 : * DEBUG:
16 : * AUTHOR: Harvest Derived
17 : *
18 : * SQUID Web Proxy Cache http://www.squid-cache.org/
19 : * ----------------------------------------------------------
20 : *
21 : * Squid is the result of efforts by numerous individuals from
22 : * the Internet community; see the CONTRIBUTORS file for full
23 : * details. Many organizations have provided support for Squid's
24 : * development; see the SPONSORS file for full details. Squid is
25 : * Copyrighted (C) 2001 by the Regents of the University of
26 : * California; see the COPYRIGHT file for full details. Squid
27 : * incorporates software developed and/or copyrighted by other
28 : * sources; see the CREDITS file for full details.
29 : *
30 : * This program is free software; you can redistribute it and/or modify
31 : * it under the terms of the GNU General Public License as published by
32 : * the Free Software Foundation; either version 2 of the License, or
33 : * (at your option) any later version.
34 : *
35 : * This program is distributed in the hope that it will be useful,
36 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
37 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38 : * GNU General Public License for more details.
39 : *
40 : * You should have received a copy of the GNU General Public License
41 : * along with this program; if not, write to the Free Software
42 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
43 : *
44 : */
45 :
46 : #include "replace.h"
47 : #include <talloc.h>
48 : #include "lib/util/samba_util.h"
49 :
50 : #define RFC1738_ENCODE 1
51 : #define RFC1738_RESERVED 2
52 :
53 : /*
54 : * According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as
55 : * that has been obsolete since 2004, we sm instead for RFC 3986, where:
56 : *
57 : * reserved = : / ? # [ ] @ ! $ & ' ( ) * + , ; =
58 : * unreserved = ALPHA DIGIT - . _ ~
59 : *
60 : * and whatever is not in either of those are what RFC 1738 called "unsafe",
61 : * meaning that they should are canonically but not mandatorily escaped.
62 : *
63 : * Characters below 0x20 or above 0x7E are always encoded.
64 : */
65 :
66 : static const unsigned char escapees[127] = {
67 : [' '] = RFC1738_ENCODE,
68 : ['"'] = RFC1738_ENCODE,
69 : ['%'] = RFC1738_ENCODE,
70 : ['<'] = RFC1738_ENCODE,
71 : ['>'] = RFC1738_ENCODE,
72 : ['\\'] = RFC1738_ENCODE,
73 : ['^'] = RFC1738_ENCODE,
74 : ['`'] = RFC1738_ENCODE,
75 : ['{'] = RFC1738_ENCODE,
76 : ['|'] = RFC1738_ENCODE,
77 : ['}'] = RFC1738_ENCODE,
78 : /* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */
79 : [':'] = RFC1738_RESERVED,
80 : ['/'] = RFC1738_RESERVED,
81 : ['?'] = RFC1738_RESERVED,
82 : ['#'] = RFC1738_RESERVED,
83 : ['['] = RFC1738_RESERVED,
84 : [']'] = RFC1738_RESERVED,
85 : ['@'] = RFC1738_RESERVED,
86 : ['!'] = RFC1738_RESERVED,
87 : ['$'] = RFC1738_RESERVED,
88 : ['&'] = RFC1738_RESERVED,
89 : ['\''] = RFC1738_RESERVED,
90 : ['('] = RFC1738_RESERVED,
91 : [')'] = RFC1738_RESERVED,
92 : ['*'] = RFC1738_RESERVED,
93 : ['+'] = RFC1738_RESERVED,
94 : [','] = RFC1738_RESERVED,
95 : [';'] = RFC1738_RESERVED,
96 : ['='] = RFC1738_RESERVED,
97 : };
98 :
99 : /*
100 : * rfc1738_do_escape - fills a preallocated buffer with an escaped version of
101 : * the given string.
102 : *
103 : * For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED.
104 : * For mandatory escaping, mask should be RFC1738_RESERVED.
105 : */
106 : static char *
107 52 : rfc1738_do_escape(char *buf, size_t bufsize,
108 : const char *url, size_t len, unsigned char mask)
109 : {
110 15 : size_t i;
111 52 : size_t j = 0;
112 2254 : for (i = 0; i < len; i++) {
113 2202 : unsigned int c = (unsigned char) url[i];
114 2202 : if (c > 126 || c < 32 || (escapees[c] & mask)) {
115 475 : if (j + 3 >= bufsize) {
116 0 : return NULL;
117 : }
118 475 : (void) snprintf(&buf[j], 4, "%%%02X", c);
119 475 : j += 3;
120 : } else {
121 1727 : if (j + 1 >= bufsize) {
122 0 : return NULL;
123 : }
124 1727 : buf[j] = c;
125 1727 : j++;
126 : }
127 : }
128 52 : buf[j] = '\0';
129 52 : return buf;
130 : }
131 :
132 : /*
133 : * rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986
134 : * compliant, escaped version of the given url segment.
135 : */
136 : char *
137 52 : rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url)
138 : {
139 52 : size_t bufsize = 0;
140 52 : char *buf = NULL;
141 :
142 52 : size_t len = strlen(url);
143 52 : if (len >= SIZE_MAX / 3) {
144 0 : return NULL;
145 : }
146 :
147 52 : bufsize = len * 3 + 1;
148 52 : buf = talloc_array(mem_ctx, char, bufsize);
149 52 : if (buf == NULL) {
150 0 : return NULL;
151 : }
152 :
153 52 : talloc_set_name_const(buf, buf);
154 :
155 52 : return rfc1738_do_escape(buf, bufsize, url, len,
156 : RFC1738_ENCODE | RFC1738_RESERVED);
157 : }
158 :
159 : /*
160 : * rfc1738_unescape() - Converts url-escaped characters in the string.
161 : *
162 : * The two characters following a '%' in a string should be hex digits that
163 : * describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII;
164 : * this is the only way to include a % in the unescaped string. Any character
165 : * can be escaped, including plain letters (e.g. "%61" for "a"). Anything
166 : * other than 2 hex characters following the % is an error.
167 : *
168 : * The conversion is done in-place, which is always safe as unescapes can only
169 : * shorten the string.
170 : *
171 : * Returns a pointer to the end of the string (that is, the '\0' byte), or
172 : * NULL on error, at which point s is in an undefined state.
173 : *
174 : * Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal
175 : * `e - s` if s originally contained "%00". You might want to check for this.
176 : */
177 :
178 235 : _PUBLIC_ char *rfc1738_unescape(char *s)
179 : {
180 23 : size_t i, j; /* i is write, j is read */
181 10572 : for (i = 0, j = 0; s[j] != '\0'; i++, j++) {
182 10346 : if (s[j] == '%') {
183 271 : uint8_t v;
184 271 : bool ok;
185 :
186 271 : ok = hex_byte(&s[j+1], &v);
187 271 : if (!ok) {
188 9 : return NULL;
189 : }
190 262 : j += 2; /* OK; hex_byte() has checked ahead */
191 262 : s[i] = (unsigned char)v;
192 : } else {
193 10075 : s[i] = s[j];
194 : }
195 : }
196 226 : s[i] = '\0';
197 226 : return s + i;
198 : }
|