Line data Source code
1 : /*
2 : Samba Unix SMB/CIFS implementation.
3 :
4 : Python bindings for compression functions.
5 :
6 : Copyright (C) Petr Viktorin 2015
7 : Copyright (C) Douglas Bagnall 2022
8 :
9 : ** NOTE! The following LGPL license applies to the talloc
10 : ** library. This does NOT imply that all of Samba is released
11 : ** under the LGPL
12 :
13 : This library is free software; you can redistribute it and/or
14 : modify it under the terms of the GNU Lesser General Public
15 : License as published by the Free Software Foundation; either
16 : version 3 of the License, or (at your option) any later version.
17 :
18 : This library is distributed in the hope that it will be useful,
19 : but WITHOUT ANY WARRANTY; without even the implied warranty of
20 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 : Lesser General Public License for more details.
22 :
23 : You should have received a copy of the GNU Lesser General Public
24 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 : */
26 :
27 : #include "includes.h"
28 : #include <talloc.h>
29 : #include "lib/replace/system/python.h"
30 : #include "lzxpress.h"
31 : #include "lzxpress_huffman.h"
32 :
33 : /* CompressionError is filled out in module init */
34 : static PyObject *CompressionError = NULL;
35 :
36 9 : static PyObject *plain_compress(PyObject *mod, PyObject *args)
37 : {
38 9 : uint8_t *src = NULL;
39 9 : Py_ssize_t src_len;
40 9 : char *dest = NULL;
41 9 : Py_ssize_t dest_len;
42 9 : PyObject *dest_obj = NULL;
43 9 : size_t alloc_len;
44 9 : int ret;
45 :
46 9 : if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) {
47 : return NULL;
48 : }
49 :
50 : /*
51 : * 9/8 + 4 is the worst case growth, but we add room.
52 : *
53 : * alloc_len can't overflow as src_len is ssize_t while alloc_len is
54 : * size_t.
55 : */
56 9 : alloc_len = src_len + src_len / 8 + 500;
57 :
58 9 : dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
59 9 : if (dest_obj == NULL) {
60 : return NULL;
61 : }
62 9 : dest = PyBytes_AS_STRING(dest_obj);
63 :
64 9 : dest_len = lzxpress_compress(src,
65 : src_len,
66 : (uint8_t *)dest,
67 : alloc_len);
68 9 : if (dest_len < 0) {
69 0 : PyErr_SetString(CompressionError, "unable to compress data");
70 0 : Py_DECREF(dest_obj);
71 0 : return NULL;
72 : }
73 :
74 9 : ret = _PyBytes_Resize(&dest_obj, dest_len);
75 9 : if (ret != 0) {
76 : /*
77 : * Don't try to free dest_obj, as we're in deep MemoryError
78 : * territory here.
79 : */
80 : return NULL;
81 : }
82 9 : return dest_obj;
83 : }
84 :
85 :
86 11 : static PyObject *plain_decompress(PyObject *mod, PyObject *args)
87 : {
88 11 : uint8_t *src = NULL;
89 11 : Py_ssize_t src_len;
90 11 : char *dest = NULL;
91 11 : Py_ssize_t dest_len;
92 11 : PyObject *dest_obj = NULL;
93 11 : Py_ssize_t alloc_len = 0;
94 11 : Py_ssize_t given_len = 0;
95 11 : int ret;
96 :
97 11 : if (!PyArg_ParseTuple(args, "s#|n", &src, &src_len, &given_len)) {
98 : return NULL;
99 : }
100 11 : if (given_len != 0) {
101 : /*
102 : * With plain decompression, we don't *need* the exact output
103 : * size (as we do with LZ77+Huffman), but it certainly helps
104 : * when guessing the size.
105 : */
106 : alloc_len = given_len;
107 2 : } else if (src_len > UINT32_MAX) {
108 : /*
109 : * The underlying decompress function will reject this, but by
110 : * checking here we can give a better message and be clearer
111 : * about overflow risks.
112 : *
113 : * Note, the limit is actually the smallest of UINT32_MAX and
114 : * SSIZE_MAX, but src_len is ssize_t so it already can't
115 : * exceed that.
116 : */
117 0 : PyErr_Format(CompressionError,
118 : "The maximum size for compressed data is 4GB "
119 : "cannot decompress %zu bytes.", src_len);
120 : } else {
121 : /*
122 : * The data can expand massively (though not beyond the
123 : * 4GB limit) so we guess a big number for small inputs
124 : * (we expect small inputs), and a relatively conservative
125 : * number for big inputs.
126 : */
127 2 : if (src_len <= 3333333) {
128 : alloc_len = 10000000;
129 0 : } else if (src_len > UINT32_MAX / 3) {
130 : alloc_len = UINT32_MAX;
131 : } else {
132 0 : alloc_len = src_len * 3;
133 : }
134 : }
135 :
136 11 : dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
137 11 : if (dest_obj == NULL) {
138 : return NULL;
139 : }
140 11 : dest = PyBytes_AS_STRING(dest_obj);
141 :
142 11 : dest_len = lzxpress_decompress(src,
143 : src_len,
144 : (uint8_t *)dest,
145 : alloc_len);
146 11 : if (dest_len < 0) {
147 2 : if (alloc_len == given_len) {
148 1 : PyErr_Format(CompressionError,
149 : "unable to decompress data into a buffer "
150 : "of %zd bytes.", alloc_len);
151 : } else {
152 1 : PyErr_Format(CompressionError,
153 : "unable to decompress data into a buffer "
154 : "of %zd bytes. If you know the length, "
155 : "supply it as the second argument.",
156 : alloc_len);
157 : }
158 2 : Py_DECREF(dest_obj);
159 2 : return NULL;
160 : }
161 :
162 9 : ret = _PyBytes_Resize(&dest_obj, dest_len);
163 9 : if (ret != 0) {
164 : /*
165 : * Don't try to free dest_obj, as we're in deep MemoryError
166 : * territory here.
167 : */
168 : return NULL;
169 : }
170 9 : return dest_obj;
171 : }
172 :
173 :
174 :
175 7 : static PyObject *huffman_compress(PyObject *mod, PyObject *args)
176 : {
177 7 : uint8_t *src = NULL;
178 7 : Py_ssize_t src_len;
179 7 : char *dest = NULL;
180 7 : Py_ssize_t dest_len;
181 7 : PyObject *dest_obj = NULL;
182 7 : size_t alloc_len;
183 7 : int ret;
184 7 : struct lzxhuff_compressor_mem cmp_mem;
185 :
186 7 : if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) {
187 : return NULL;
188 : }
189 : /*
190 : * worst case is roughly 256 per 64k or less.
191 : *
192 : * alloc_len won't overflow as src_len is ssize_t while alloc_len is
193 : * size_t.
194 : */
195 7 : alloc_len = src_len + src_len / 8 + 500;
196 :
197 7 : dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
198 7 : if (dest_obj == NULL) {
199 : return NULL;
200 : }
201 7 : dest = PyBytes_AS_STRING(dest_obj);
202 :
203 7 : dest_len = lzxpress_huffman_compress(&cmp_mem,
204 : src,
205 : src_len,
206 : (uint8_t *)dest,
207 : alloc_len);
208 7 : if (dest_len < 0) {
209 1 : PyErr_SetString(CompressionError, "unable to compress data");
210 1 : Py_DECREF(dest_obj);
211 1 : return NULL;
212 : }
213 :
214 6 : ret = _PyBytes_Resize(&dest_obj, dest_len);
215 6 : if (ret != 0) {
216 : return NULL;
217 : }
218 6 : return dest_obj;
219 : }
220 :
221 :
222 8 : static PyObject *huffman_decompress(PyObject *mod, PyObject *args)
223 : {
224 8 : uint8_t *src = NULL;
225 8 : Py_ssize_t src_len;
226 8 : char *dest = NULL;
227 8 : Py_ssize_t dest_len;
228 8 : PyObject *dest_obj = NULL;
229 8 : Py_ssize_t given_len = 0;
230 : /*
231 : * Here it is always necessary to supply the exact length.
232 : */
233 :
234 8 : if (!PyArg_ParseTuple(args, "s#n", &src, &src_len, &given_len)) {
235 : return NULL;
236 : }
237 :
238 7 : dest_obj = PyBytes_FromStringAndSize(NULL, given_len);
239 7 : if (dest_obj == NULL) {
240 : return NULL;
241 : }
242 7 : dest = PyBytes_AS_STRING(dest_obj);
243 :
244 7 : dest_len = lzxpress_huffman_decompress(src,
245 : src_len,
246 : (uint8_t *)dest,
247 : given_len);
248 7 : if (dest_len != given_len) {
249 3 : PyErr_Format(CompressionError,
250 : "unable to decompress data into a %zd bytes.",
251 : given_len);
252 3 : Py_DECREF(dest_obj);
253 3 : return NULL;
254 : }
255 : /* no resize here */
256 : return dest_obj;
257 : }
258 :
259 :
260 : static PyMethodDef mod_methods[] = {
261 : { "plain_compress", (PyCFunction)plain_compress, METH_VARARGS,
262 : "compress bytes using lzxpress plain compression"},
263 : { "plain_decompress", (PyCFunction)plain_decompress, METH_VARARGS,
264 : "decompress lzxpress plain compressed bytes"},
265 : { "huffman_compress", (PyCFunction)huffman_compress, METH_VARARGS,
266 : "compress bytes using lzxpress plain compression"},
267 : { "huffman_decompress", (PyCFunction)huffman_decompress, METH_VARARGS,
268 : "decompress lzxpress plain compressed bytes"},
269 : {0}
270 : };
271 :
272 :
273 : #define MODULE_DOC PyDoc_STR("LZXpress compression/decompression bindings")
274 :
275 : static struct PyModuleDef moduledef = {
276 : PyModuleDef_HEAD_INIT,
277 : .m_name = "compression",
278 : .m_doc = MODULE_DOC,
279 : .m_size = -1,
280 : .m_methods = mod_methods,
281 : };
282 :
283 :
284 1 : static PyObject *module_init(void)
285 : {
286 1 : PyObject *m = PyModule_Create(&moduledef);
287 1 : if (m == NULL) {
288 : return NULL;
289 : }
290 :
291 1 : CompressionError = PyErr_NewException(
292 : "compression.CompressionError",
293 : PyExc_Exception,
294 : NULL);
295 1 : PyModule_AddObject(m, "CompressionError", CompressionError);
296 :
297 1 : return m;
298 : }
299 :
300 : PyMODINIT_FUNC PyInit_compression(void);
301 1 : PyMODINIT_FUNC PyInit_compression(void)
302 : {
303 1 : return module_init();
304 : }
|