pylzma_decompressobj.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /*
  2. * Python Bindings for LZMA
  3. *
  4. * Copyright (c) 2004-2006 by Joachim Bauch, mail@joachim-bauch.de
  5. * 7-Zip Copyright (C) 1999-2005 Igor Pavlov
  6. * LZMA SDK Copyright (C) 1999-2005 Igor Pavlov
  7. *
  8. * This library is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * This library is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with this library; if not, write to the Free Software
  20. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  21. *
  22. * $Id: pylzma_decompressobj.c 116 2006-09-28 21:46:20Z jojo $
  23. *
  24. */
  25. #include <Python.h>
  26. #include <7zip/LzmaStateDecode.h>
  27. #include "pylzma.h"
  28. #include "pylzma_decompress.h"
  29. #include "pylzma_decompressobj.h"
  30. int pylzma_decomp_init(CDecompressionObject *self, PyObject *args, PyObject *kwargs)
  31. {
  32. int max_length = -1;
  33. // possible keywords for this function
  34. static char *kwlist[] = {"maxlength", NULL};
  35. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &max_length))
  36. return -1;
  37. if (max_length == 0 || max_length < -1) {
  38. PyErr_SetString(PyExc_ValueError, "the decompressed size must be greater than zero");
  39. return -1;
  40. }
  41. self->unconsumed_tail = NULL;
  42. self->unconsumed_length = 0;
  43. self->need_properties = 1;
  44. self->max_length = max_length;
  45. self->total_out = 0;
  46. memset(&self->state, 0, sizeof(self->state));
  47. return 0;
  48. }
  49. static const char doc_decomp_decompress[] = \
  50. "decompress(data[, bufsize]) -- Returns a string containing the up to bufsize decompressed bytes of the data.\n" \
  51. "After calling, some of the input data may be available in internal buffers for later processing.";
  52. static PyObject *pylzma_decomp_decompress(CDecompressionObject *self, PyObject *args)
  53. {
  54. PyObject *result=NULL;
  55. unsigned char *data, *next_in, *next_out;
  56. int length, start_total_out, res, max_length=BLOCK_SIZE;
  57. SizeT avail_in, avail_out;
  58. unsigned char properties[LZMA_PROPERTIES_SIZE];
  59. SizeT inProcessed, outProcessed;
  60. if (!PyArg_ParseTuple(args, "s#|l", &data, &length, &max_length))
  61. return NULL;
  62. if (max_length <= 0)
  63. {
  64. PyErr_SetString(PyExc_ValueError, "bufsize must be greater than zero");
  65. return NULL;
  66. }
  67. start_total_out = self->total_out;
  68. if (self->unconsumed_length > 0) {
  69. self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, self->unconsumed_length + length);
  70. next_in = (unsigned char *)self->unconsumed_tail;
  71. memcpy(next_in + self->unconsumed_length, data, length);
  72. } else
  73. next_in = data;
  74. avail_in = self->unconsumed_length + length;
  75. if (self->need_properties && avail_in < sizeof(properties)) {
  76. // we need enough bytes to read the properties
  77. if (!self->unconsumed_length) {
  78. self->unconsumed_tail = (unsigned char *)malloc(length);
  79. memcpy(self->unconsumed_tail, data, length);
  80. }
  81. self->unconsumed_length += length;
  82. return PyString_FromString("");
  83. }
  84. if (self->need_properties) {
  85. self->need_properties = 0;
  86. memcpy(&properties, next_in, sizeof(properties));
  87. avail_in -= sizeof(properties);
  88. next_in += sizeof(properties);
  89. if (self->unconsumed_length >= sizeof(properties)-length) {
  90. self->unconsumed_length -= sizeof(properties)-length;
  91. if (self->unconsumed_length > 0) {
  92. memcpy(self->unconsumed_tail, self->unconsumed_tail+sizeof(properties), self->unconsumed_length);
  93. self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, self->unconsumed_length);
  94. } else
  95. FREE_AND_NULL(self->unconsumed_tail);
  96. }
  97. if (LzmaDecodeProperties(&self->state.Properties, properties, LZMA_PROPERTIES_SIZE) != LZMA_RESULT_OK)
  98. {
  99. PyErr_SetString(PyExc_TypeError, "Incorrect stream properties");
  100. goto exit;
  101. }
  102. self->state.Probs = (CProb *)malloc(LzmaGetNumProbs(&self->state.Properties) * sizeof(CProb));
  103. if (self->state.Probs == 0) {
  104. PyErr_NoMemory();
  105. goto exit;
  106. }
  107. if (self->state.Properties.DictionarySize == 0)
  108. self->state.Dictionary = 0;
  109. else {
  110. self->state.Dictionary = (unsigned char *)malloc(self->state.Properties.DictionarySize);
  111. if (self->state.Dictionary == 0) {
  112. free(self->state.Probs);
  113. self->state.Probs = NULL;
  114. PyErr_NoMemory();
  115. goto exit;
  116. }
  117. }
  118. LzmaDecoderInit(&self->state);
  119. }
  120. if (avail_in == 0)
  121. // no more bytes to decompress
  122. return PyString_FromString("");
  123. if (!(result = PyString_FromStringAndSize(NULL, max_length)))
  124. return NULL;
  125. next_out = (unsigned char *)PyString_AS_STRING(result);
  126. avail_out = max_length;
  127. Py_BEGIN_ALLOW_THREADS
  128. // Decompress until EOS marker is reached
  129. res = LzmaDecode(&self->state, next_in, avail_in, &inProcessed,
  130. next_out, avail_out, &outProcessed, 0);
  131. Py_END_ALLOW_THREADS
  132. self->total_out += outProcessed;
  133. next_in += inProcessed;
  134. avail_in -= inProcessed;
  135. next_out += outProcessed;
  136. avail_out -= outProcessed;
  137. if (res != LZMA_RESULT_OK) {
  138. PyErr_SetString(PyExc_ValueError, "data error during decompression");
  139. DEC_AND_NULL(result);
  140. goto exit;
  141. }
  142. /* Not all of the compressed data could be accomodated in the output buffer
  143. of specified size. Return the unconsumed tail in an attribute.*/
  144. if (avail_in > 0)
  145. {
  146. if (avail_in != self->unconsumed_length) {
  147. if (avail_in > self->unconsumed_length) {
  148. self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, avail_in);
  149. memcpy(self->unconsumed_tail, next_in, avail_in);
  150. }
  151. if (avail_in < self->unconsumed_length) {
  152. memcpy(self->unconsumed_tail, next_in, avail_in);
  153. self->unconsumed_tail = (unsigned char *)realloc(self->unconsumed_tail, avail_in);
  154. }
  155. }
  156. if (!self->unconsumed_tail) {
  157. PyErr_NoMemory();
  158. DEC_AND_NULL(result);
  159. goto exit;
  160. }
  161. } else
  162. FREE_AND_NULL(self->unconsumed_tail);
  163. self->unconsumed_length = avail_in;
  164. _PyString_Resize(&result, self->total_out - start_total_out);
  165. exit:
  166. return result;
  167. }
  168. static const char doc_decomp_flush[] = \
  169. "flush() -- Return remaining data.";
  170. static PyObject *pylzma_decomp_flush(CDecompressionObject *self, PyObject *args)
  171. {
  172. PyObject *result=NULL;
  173. int res;
  174. SizeT avail_out, outsize;
  175. unsigned char *tmp;
  176. SizeT inProcessed, outProcessed;
  177. if (!PyArg_ParseTuple(args, ""))
  178. return NULL;
  179. if (self->max_length != -1)
  180. avail_out = self->max_length - self->total_out;
  181. else
  182. avail_out = BLOCK_SIZE;
  183. if (avail_out == 0)
  184. // no more remaining data
  185. return PyString_FromString("");
  186. result = PyString_FromStringAndSize(NULL, avail_out);
  187. if (result == NULL)
  188. return NULL;
  189. tmp = (unsigned char *)PyString_AS_STRING(result);
  190. outsize = 0;
  191. while (1) {
  192. Py_BEGIN_ALLOW_THREADS
  193. if (self->unconsumed_length == 0)
  194. // No remaining data
  195. res = LzmaDecode(&self->state, (unsigned char *)"", 0, &inProcessed,
  196. tmp, avail_out, &outProcessed, 1);
  197. else {
  198. // Decompress remaining data
  199. res = LzmaDecode(&self->state, self->unconsumed_tail, self->unconsumed_length, &inProcessed,
  200. tmp, avail_out, &outProcessed, 1);
  201. self->unconsumed_length -= inProcessed;
  202. if (self->unconsumed_length > 0)
  203. memcpy(self->unconsumed_tail, self->unconsumed_tail + inProcessed, self->unconsumed_length);
  204. else
  205. FREE_AND_NULL(self->unconsumed_tail);
  206. }
  207. Py_END_ALLOW_THREADS
  208. if (res != LZMA_RESULT_OK) {
  209. PyErr_SetString(PyExc_ValueError, "data error during decompression");
  210. DEC_AND_NULL(result);
  211. goto exit;
  212. }
  213. self->total_out += outProcessed;
  214. outsize += outProcessed;
  215. if (outProcessed < avail_out || (outProcessed == avail_out && self->max_length != -1))
  216. break;
  217. if (self->max_length != -1) {
  218. PyErr_SetString(PyExc_ValueError, "not enough input data for decompression");
  219. DEC_AND_NULL(result);
  220. goto exit;
  221. }
  222. avail_out -= outProcessed;
  223. // Output buffer is full, might be more data for decompression
  224. if (_PyString_Resize(&result, outsize+BLOCK_SIZE) != 0)
  225. goto exit;
  226. avail_out += BLOCK_SIZE;
  227. tmp = (unsigned char *)PyString_AS_STRING(result) + outsize;
  228. }
  229. if (outsize != PyString_GET_SIZE(result))
  230. _PyString_Resize(&result, outsize);
  231. exit:
  232. return result;
  233. }
  234. static const char doc_decomp_reset[] = \
  235. "reset([maxlength]) -- Resets the decompression object.";
  236. static PyObject *pylzma_decomp_reset(CDecompressionObject *self, PyObject *args, PyObject *kwargs)
  237. {
  238. PyObject *result=NULL;
  239. int max_length = -1;
  240. // possible keywords for this function
  241. static char *kwlist[] = {"maxlength", NULL};
  242. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &max_length))
  243. return NULL;
  244. free_lzma_state(&self->state);
  245. memset(&self->state, 0, sizeof(self->state));
  246. FREE_AND_NULL(self->unconsumed_tail);
  247. self->unconsumed_length = 0;
  248. self->need_properties = 1;
  249. self->total_out = 0;
  250. self->max_length = max_length;
  251. result = Py_None;
  252. Py_XINCREF(result);
  253. return result;
  254. }
  255. static PyMethodDef pylzma_decomp_methods[] = {
  256. {"decompress", (PyCFunction)pylzma_decomp_decompress, METH_VARARGS, (char *)&doc_decomp_decompress},
  257. {"flush", (PyCFunction)pylzma_decomp_flush, METH_VARARGS, (char *)&doc_decomp_flush},
  258. {"reset", (PyCFunction)pylzma_decomp_reset, METH_VARARGS | METH_KEYWORDS, (char *)&doc_decomp_reset},
  259. {NULL, NULL},
  260. };
  261. static void pylzma_decomp_dealloc(CDecompressionObject *self)
  262. {
  263. free_lzma_state(&self->state);
  264. FREE_AND_NULL(self->unconsumed_tail);
  265. self->ob_type->tp_free((PyObject*)self);
  266. }
  267. PyTypeObject CDecompressionObject_Type = {
  268. //PyObject_HEAD_INIT(&PyType_Type)
  269. PyObject_HEAD_INIT(NULL)
  270. 0,
  271. "LZMADecompress", /* char *tp_name; */
  272. sizeof(CDecompressionObject), /* int tp_basicsize; */
  273. 0, /* int tp_itemsize; // not used much */
  274. (destructor)pylzma_decomp_dealloc, /* destructor tp_dealloc; */
  275. NULL, /* printfunc tp_print; */
  276. NULL, /* getattrfunc tp_getattr; // __getattr__ */
  277. NULL, /* setattrfunc tp_setattr; // __setattr__ */
  278. NULL, /* cmpfunc tp_compare; // __cmp__ */
  279. NULL, /* reprfunc tp_repr; // __repr__ */
  280. NULL, /* PyNumberMethods *tp_as_number; */
  281. NULL, /* PySequenceMethods *tp_as_sequence; */
  282. NULL, /* PyMappingMethods *tp_as_mapping; */
  283. NULL, /* hashfunc tp_hash; // __hash__ */
  284. NULL, /* ternaryfunc tp_call; // __call__ */
  285. NULL, /* reprfunc tp_str; // __str__ */
  286. 0, /* tp_getattro*/
  287. 0, /* tp_setattro*/
  288. 0, /* tp_as_buffer*/
  289. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
  290. "Decompression class", /* tp_doc */
  291. 0, /* tp_traverse */
  292. 0, /* tp_clear */
  293. 0, /* tp_richcompare */
  294. 0, /* tp_weaklistoffset */
  295. 0, /* tp_iter */
  296. 0, /* tp_iternext */
  297. pylzma_decomp_methods, /* tp_methods */
  298. 0, /* tp_members */
  299. 0, /* tp_getset */
  300. 0, /* tp_base */
  301. 0, /* tp_dict */
  302. 0, /* tp_descr_get */
  303. 0, /* tp_descr_set */
  304. 0, /* tp_dictoffset */
  305. (initproc)pylzma_decomp_init, /* tp_init */
  306. 0, /* tp_alloc */
  307. 0, /* tp_new */
  308. };