UncompressingDownload.php 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. <?php
  2. /**
  3. * An observer that saves response body to stream, possibly uncompressing it
  4. *
  5. * PHP version 5
  6. *
  7. * LICENSE
  8. *
  9. * This source file is subject to BSD 3-Clause License that is bundled
  10. * with this package in the file LICENSE and available at the URL
  11. * https://raw.github.com/pear/HTTP_Request2/trunk/docs/LICENSE
  12. *
  13. * @category HTTP
  14. * @package HTTP_Request2
  15. * @author Delian Krustev <krustev@krustev.net>
  16. * @author Alexey Borzov <avb@php.net>
  17. * @copyright 2008-2016 Alexey Borzov <avb@php.net>
  18. * @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause License
  19. * @link http://pear.php.net/package/HTTP_Request2
  20. */
  21. require_once 'HTTP/Request2/Response.php';
  22. /**
  23. * An observer that saves response body to stream, possibly uncompressing it
  24. *
  25. * This Observer is written in compliment to pear's HTTP_Request2 in order to
  26. * avoid reading the whole response body in memory. Instead it writes the body
  27. * to a stream. If the body is transferred with content-encoding set to
  28. * "deflate" or "gzip" it is decoded on the fly.
  29. *
  30. * The constructor accepts an already opened (for write) stream (file_descriptor).
  31. * If the response is deflate/gzip encoded a "zlib.inflate" filter is applied
  32. * to the stream. When the body has been read from the request and written to
  33. * the stream ("receivedBody" event) the filter is removed from the stream.
  34. *
  35. * The "zlib.inflate" filter works fine with pure "deflate" encoding. It does
  36. * not understand the "deflate+zlib" and "gzip" headers though, so they have to
  37. * be removed prior to being passed to the stream. This is done in the "update"
  38. * method.
  39. *
  40. * It is also possible to limit the size of written extracted bytes by passing
  41. * "max_bytes" to the constructor. This is important because e.g. 1GB of
  42. * zeroes take about a MB when compressed.
  43. *
  44. * Exceptions are being thrown if data could not be written to the stream or
  45. * the written bytes have already exceeded the requested maximum. If the "gzip"
  46. * header is malformed or could not be parsed an exception will be thrown too.
  47. *
  48. * Example usage follows:
  49. *
  50. * <code>
  51. * require_once 'HTTP/Request2.php';
  52. * require_once 'HTTP/Request2/Observer/UncompressingDownload.php';
  53. *
  54. * #$inPath = 'http://carsten.codimi.de/gzip.yaws/daniels.html';
  55. * #$inPath = 'http://carsten.codimi.de/gzip.yaws/daniels.html?deflate=on';
  56. * $inPath = 'http://carsten.codimi.de/gzip.yaws/daniels.html?deflate=on&zlib=on';
  57. * #$outPath = "/dev/null";
  58. * $outPath = "delme";
  59. *
  60. * $stream = fopen($outPath, 'wb');
  61. * if (!$stream) {
  62. * throw new Exception('fopen failed');
  63. * }
  64. *
  65. * $request = new HTTP_Request2(
  66. * $inPath,
  67. * HTTP_Request2::METHOD_GET,
  68. * array(
  69. * 'store_body' => false,
  70. * 'connect_timeout' => 5,
  71. * 'timeout' => 10,
  72. * 'ssl_verify_peer' => true,
  73. * 'ssl_verify_host' => true,
  74. * 'ssl_cafile' => null,
  75. * 'ssl_capath' => '/etc/ssl/certs',
  76. * 'max_redirects' => 10,
  77. * 'follow_redirects' => true,
  78. * 'strict_redirects' => false
  79. * )
  80. * );
  81. *
  82. * $observer = new HTTP_Request2_Observer_UncompressingDownload($stream, 9999999);
  83. * $request->attach($observer);
  84. *
  85. * $response = $request->send();
  86. *
  87. * fclose($stream);
  88. * echo "OK\n";
  89. * </code>
  90. *
  91. * @category HTTP
  92. * @package HTTP_Request2
  93. * @author Delian Krustev <krustev@krustev.net>
  94. * @author Alexey Borzov <avb@php.net>
  95. * @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause License
  96. * @version Release: 2.3.0
  97. * @link http://pear.php.net/package/HTTP_Request2
  98. */
  99. class HTTP_Request2_Observer_UncompressingDownload implements SplObserver
  100. {
  101. /**
  102. * The stream to write response body to
  103. * @var resource
  104. */
  105. private $_stream;
  106. /**
  107. * zlib.inflate filter possibly added to stream
  108. * @var resource
  109. */
  110. private $_streamFilter;
  111. /**
  112. * The value of response's Content-Encoding header
  113. * @var string
  114. */
  115. private $_encoding;
  116. /**
  117. * Whether the observer is still waiting for gzip/deflate header
  118. * @var bool
  119. */
  120. private $_processingHeader = true;
  121. /**
  122. * Starting position in the stream observer writes to
  123. * @var int
  124. */
  125. private $_startPosition = 0;
  126. /**
  127. * Maximum bytes to write
  128. * @var int|null
  129. */
  130. private $_maxDownloadSize;
  131. /**
  132. * Whether response being received is a redirect
  133. * @var bool
  134. */
  135. private $_redirect = false;
  136. /**
  137. * Accumulated body chunks that may contain (gzip) header
  138. * @var string
  139. */
  140. private $_possibleHeader = '';
  141. /**
  142. * Class constructor
  143. *
  144. * Note that there might be problems with max_bytes and files bigger
  145. * than 2 GB on 32bit platforms
  146. *
  147. * @param resource $stream a stream (or file descriptor) opened for writing.
  148. * @param int $maxDownloadSize maximum bytes to write
  149. */
  150. public function __construct($stream, $maxDownloadSize = null)
  151. {
  152. $this->_stream = $stream;
  153. if ($maxDownloadSize) {
  154. $this->_maxDownloadSize = $maxDownloadSize;
  155. $this->_startPosition = ftell($this->_stream);
  156. }
  157. }
  158. /**
  159. * Called when the request notifies us of an event.
  160. *
  161. * @param SplSubject $request The HTTP_Request2 instance
  162. *
  163. * @return void
  164. * @throws HTTP_Request2_MessageException
  165. */
  166. public function update(SplSubject $request)
  167. {
  168. /* @var $request HTTP_Request2 */
  169. $event = $request->getLastEvent();
  170. $encoded = false;
  171. /* @var $event['data'] HTTP_Request2_Response */
  172. switch ($event['name']) {
  173. case 'receivedHeaders':
  174. $this->_processingHeader = true;
  175. $this->_redirect = $event['data']->isRedirect();
  176. $this->_encoding = strtolower($event['data']->getHeader('content-encoding'));
  177. $this->_possibleHeader = '';
  178. break;
  179. case 'receivedEncodedBodyPart':
  180. if (!$this->_streamFilter
  181. && ($this->_encoding === 'deflate' || $this->_encoding === 'gzip')
  182. ) {
  183. $this->_streamFilter = stream_filter_append(
  184. $this->_stream, 'zlib.inflate', STREAM_FILTER_WRITE
  185. );
  186. }
  187. $encoded = true;
  188. // fall-through is intentional
  189. case 'receivedBodyPart':
  190. if ($this->_redirect) {
  191. break;
  192. }
  193. if (!$encoded || !$this->_processingHeader) {
  194. $bytes = fwrite($this->_stream, $event['data']);
  195. } else {
  196. $offset = 0;
  197. $this->_possibleHeader .= $event['data'];
  198. if ('deflate' === $this->_encoding) {
  199. if (2 > strlen($this->_possibleHeader)) {
  200. break;
  201. }
  202. $header = unpack('n', substr($this->_possibleHeader, 0, 2));
  203. if (0 == $header[1] % 31) {
  204. $offset = 2;
  205. }
  206. } elseif ('gzip' === $this->_encoding) {
  207. if (10 > strlen($this->_possibleHeader)) {
  208. break;
  209. }
  210. try {
  211. $offset = HTTP_Request2_Response::parseGzipHeader($this->_possibleHeader, false);
  212. } catch (HTTP_Request2_MessageException $e) {
  213. // need more data?
  214. if (false !== strpos($e->getMessage(), 'data too short')) {
  215. break;
  216. }
  217. throw $e;
  218. }
  219. }
  220. $this->_processingHeader = false;
  221. $bytes = fwrite($this->_stream, substr($this->_possibleHeader, $offset));
  222. }
  223. if (false === $bytes) {
  224. throw new HTTP_Request2_MessageException('fwrite failed.');
  225. }
  226. if ($this->_maxDownloadSize
  227. && ftell($this->_stream) - $this->_startPosition > $this->_maxDownloadSize
  228. ) {
  229. throw new HTTP_Request2_MessageException(sprintf(
  230. 'Body length limit (%d bytes) reached',
  231. $this->_maxDownloadSize
  232. ));
  233. }
  234. break;
  235. case 'receivedBody':
  236. if ($this->_streamFilter) {
  237. stream_filter_remove($this->_streamFilter);
  238. $this->_streamFilter = null;
  239. }
  240. break;
  241. }
  242. }
  243. }