StoreRemoteMedia.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. <?php
  2. // {{{ License
  3. // This file is part of GNU social - https://www.gnu.org/software/social
  4. //
  5. // GNU social is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU Affero General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // GNU social is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU Affero General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  17. // }}}
  18. namespace Plugin\StoreRemoteMedia;
  19. use App\Core\Modules\Plugin;
  20. /**
  21. * The StoreRemoteMedia plugin downloads remotely attached files to local server.
  22. *
  23. * @package GNUsocial
  24. *
  25. * @author Mikael Nordfeldth
  26. * @author Stephen Paul Weber
  27. * @author Mikael Nordfeldth
  28. * @author Miguel Dantas
  29. * @author Diogo Peralta Cordeiro
  30. * @copyright 2015-2016, 2019-2021 Free Software Foundation, Inc http://www.fsf.org
  31. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  32. */
  33. class StoreRemoteMedia extends Plugin
  34. {
  35. const PLUGIN_VERSION = '3.0.0';
  36. // settings which can be set in config.php with addPlugin('StoreRemoteMedia', array('param'=>'value', ...));
  37. // WARNING, these are _regexps_ (slashes added later). Always escape your dots and end your strings
  38. public $domain_whitelist = [
  39. // hostname => service provider
  40. '^i\d*\.ytimg\.com$' => 'YouTube',
  41. '^i\d*\.vimeocdn\.com$' => 'Vimeo',
  42. ];
  43. public $append_whitelist = []; // fill this array as domain_whitelist to add more trusted sources
  44. public $check_whitelist = false; // security/abuse precaution
  45. public $store_original = false; // Whether to maintain a copy of the original media or only a thumbnail of it
  46. public $thumbnail_width;
  47. public $thumbnail_height;
  48. public $crop;
  49. public $max_size;
  50. /**
  51. * Initialize the StoreRemoteMedia plugin and set up the environment it needs for it.
  52. * Returns true if it initialized properly, the exception object if it
  53. * doesn't.
  54. */
  55. public function initialize()
  56. {
  57. parent::initialize();
  58. $this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist);
  59. // Load global configuration if specific not provided
  60. $this->thumbnail_width = $this->thumbnail_width ?? common_config('thumbnail', 'width');
  61. $this->thumbnail_height = $this->thumbnail_height ?? common_config('thumbnail', 'height');
  62. $this->max_size = $this->max_size ?? common_config('attachments', 'file_quota');
  63. $this->crop = $this->crop ?? common_config('thumbnail', 'crop');
  64. }
  65. /**
  66. * This event executes when GNU social is creating a file thumbnail entry in
  67. * the database. We glom onto this to fetch remote attachments.
  68. *
  69. * @param $file File the file of the created thumbnail
  70. * @param &$imgPath null|string = out the path to the created thumbnail (output parameter)
  71. * @param $media string = media type (unused)
  72. *
  73. * @throws AlreadyFulfilledException
  74. * @throws FileNotFoundException
  75. * @throws FileNotStoredLocallyException
  76. * @throws HTTP_Request2_Exception
  77. * @throws ServerException
  78. *
  79. * @return bool
  80. */
  81. public function onCreateFileImageThumbnailSource(File $file, ?string &$imgPath = null, ?string $media = null): bool
  82. {
  83. // If we are on a private node, we won't do any remote calls (just as a precaution until
  84. // we can configure this from config.php for the private nodes)
  85. if (common_config('site', 'private')) {
  86. return true;
  87. }
  88. // If there is a local filename, it is either a local file already or has already been downloaded.
  89. if (!$file->isStoredRemotely()) {
  90. common_debug(sprintf('File id==%d isn\'t a non-fetched remote file (%s), so nothing StoreRemoteMedia ' .
  91. 'should handle.', $file->getID(), _ve($file->filename)));
  92. return true;
  93. }
  94. try {
  95. File_thumbnail::byFile($file);
  96. // If we don't get the exception `No result found on File_thumbnail lookup.` then Embed has already handled it most likely.
  97. return true;
  98. } catch (NoResultException $e) {
  99. // We can move on
  100. }
  101. $url = $file->getUrl(false);
  102. if (substr($url, 0, 7) == 'file://') {
  103. $filename = substr($url, 7);
  104. $info = getimagesize($filename);
  105. $filename = basename($filename);
  106. $width = $info[0];
  107. $height = $info[1];
  108. } else {
  109. $this->checkWhitelist($url);
  110. $head = (new HTTPClient())->head($url);
  111. $headers = $head->getHeader();
  112. $headers = array_change_key_case($headers, CASE_LOWER);
  113. try {
  114. $is_image = $this->isRemoteImage($url, $headers);
  115. if ($is_image == true) {
  116. $file_size = $this->getRemoteFileSize($url, $headers);
  117. if (($file_size != false) && ($file_size > $this->max_size)) {
  118. common_debug('Went to store remote thumbnail of size ' . $file_size .
  119. ' but the upload limit is ' . $this->max_size . ' so we aborted.');
  120. return false;
  121. }
  122. } else {
  123. return false;
  124. }
  125. } catch (Exception $err) {
  126. common_debug('Could not determine size of remote image, aborted local storage.');
  127. throw $err;
  128. }
  129. // First we download the file to memory and test whether it's actually an image file
  130. // FIXME: To support remote video/whatever files, this needs reworking.
  131. common_debug(sprintf(
  132. 'Downloading remote image for file id==%u with URL: %s',
  133. $file->getID(),
  134. $url
  135. ));
  136. try {
  137. $imgData = HTTPClient::quickGet($url);
  138. if (isset($imgData)) {
  139. list($filename, $filehash, $width, $height) = $this->validateAndWriteImage(
  140. $imgData,
  141. $url,
  142. $headers,
  143. $file->getID()
  144. );
  145. } else {
  146. throw new UnsupportedMediaException('HTTPClient returned an empty result');
  147. }
  148. } catch (UnsupportedMediaException $e) {
  149. // Couldn't find anything that looks like an image, nothing to do
  150. common_debug("StoreRemoteMedia was not able to find an image for URL `{$url}`: " . $e->getMessage());
  151. return false;
  152. }
  153. }
  154. $ft = null;
  155. if ($this->store_original) {
  156. try {
  157. // Update our database for the file record
  158. $orig = clone $file;
  159. $file->filename = $filename;
  160. $file->filehash = $filehash;
  161. $file->width = $width;
  162. $file->height = $height;
  163. // Throws exception on failure.
  164. $file->updateWithKeys($orig);
  165. } catch (Exception $err) {
  166. common_log(LOG_ERR, 'Went to update a file entry on the database in ' .
  167. 'StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: ' . $err);
  168. throw $err;
  169. }
  170. } else {
  171. try {
  172. // Insert a thumbnail record for this file
  173. $data = new stdClass();
  174. $data->thumbnail_url = $url;
  175. $data->thumbnail_width = $width;
  176. $data->thumbnail_height = $height;
  177. File_thumbnail::saveNew($data, $file->getID());
  178. $ft = File_thumbnail::byFile($file);
  179. $orig = clone $ft;
  180. $ft->filename = $filename;
  181. $ft->updateWithKeys($orig);
  182. } catch (Exception $err) {
  183. common_log(LOG_ERR, 'Went to write a thumbnail entry to the database in ' .
  184. 'StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: ' . $err);
  185. throw $err;
  186. }
  187. }
  188. // Out
  189. try {
  190. $imgPath = $file->getFileOrThumbnailPath($ft);
  191. return !file_exists($imgPath);
  192. } catch (Exception $e) {
  193. return true;
  194. }
  195. }
  196. /**
  197. * Check the file size of a remote file using a HEAD request and checking
  198. * the content-length variable returned. This isn't 100% foolproof but is
  199. * reliable enough for our purposes.
  200. *
  201. * @param mixed $url
  202. * @param null|mixed $headers
  203. *
  204. * @return bool|string the file size if it succeeds, false otherwise.
  205. */
  206. private function getRemoteFileSize($url, $headers = null)
  207. {
  208. try {
  209. if ($headers === null) {
  210. if (!common_valid_http_url($url)) {
  211. common_log(LOG_ERR, 'Invalid URL in StoreRemoteMedia::getRemoteFileSize()');
  212. return false;
  213. }
  214. $head = (new HTTPClient())->head($url);
  215. $headers = $head->getHeader();
  216. $headers = array_change_key_case($headers, CASE_LOWER);
  217. }
  218. return $headers['content-length'] ?? false;
  219. } catch (Exception $err) {
  220. common_log(LOG_ERR, __CLASS__ . ': getRemoteFileSize on URL : ' . _ve($url) .
  221. ' threw exception: ' . $err->getMessage());
  222. return false;
  223. }
  224. }
  225. /**
  226. * A private helper function that uses a CURL lookup to check the mime type
  227. * of a remote URL to see it it's an image.
  228. *
  229. * @param mixed $url
  230. * @param null|mixed $headers
  231. *
  232. * @return bool true if the remote URL is an image, or false otherwise.
  233. */
  234. private function isRemoteImage($url, $headers = null): bool
  235. {
  236. if (empty($headers)) {
  237. if (!common_valid_http_url($url)) {
  238. common_log(LOG_ERR, 'Invalid URL in StoreRemoteMedia::isRemoteImage()');
  239. return false;
  240. }
  241. $head = (new HTTPClient())->head($url);
  242. $headers = $head->getHeader();
  243. $headers = array_change_key_case($headers, CASE_LOWER);
  244. }
  245. return !empty($headers['content-type']) && common_get_mime_media($headers['content-type']) === 'image';
  246. }
  247. /**
  248. * Validate that $imgData is a valid image before writing it to
  249. * disk, as well as resizing it to at most $this->thumbnail_width
  250. * by $this->thumbnail_height
  251. *
  252. * @param $imgData - The image data to validate. Taken by reference to avoid copying
  253. * @param null|string $url - The url where the image came from, to fetch metadata
  254. * @param null|array $headers - The headers possible previous request to $url
  255. * @param null|int $file_id - The id of the file this image belongs to, used for logging
  256. */
  257. protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null): array
  258. {
  259. $info = @getimagesizefromstring($imgData);
  260. // array indexes documented on php.net:
  261. // https://php.net/manual/en/function.getimagesize.php
  262. if ($info === false) {
  263. throw new UnsupportedMediaException(_m('Remote file format was not identified as an image.'), $url);
  264. } elseif (!$info[0] || !$info[1]) {
  265. throw new UnsupportedMediaException(_m('Image file had impossible geometry (0 width or height)'));
  266. }
  267. $width = min($info[0], $this->thumbnail_width);
  268. $height = min($info[1], $this->thumbnail_height);
  269. $filehash = hash(File::FILEHASH_ALG, $imgData);
  270. try {
  271. if (!empty($url)) {
  272. $original_name = HTTPClient::get_filename($url, $headers);
  273. }
  274. $filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash);
  275. } catch (Exception $err) {
  276. common_log(LOG_ERR, 'Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail ' .
  277. "but encountered error: {$err}");
  278. throw $err;
  279. }
  280. try {
  281. $fullpath = $this->store_original ? File::path($filename) : File_thumbnail::path($filename);
  282. // Write the file to disk. Throw Exception on failure
  283. if (!file_exists($fullpath)) {
  284. if (strpos($fullpath, INSTALLDIR) !== 0 || file_put_contents($fullpath, $imgData) === false) {
  285. throw new ServerException(_m('Could not write downloaded file to disk.'));
  286. }
  287. if (common_get_mime_media(MediaFile::getUploadedMimeType($fullpath)) !== 'image') {
  288. @unlink($fullpath);
  289. throw new UnsupportedMediaException(
  290. _m('Remote file format was not identified as an image.'),
  291. $url
  292. );
  293. }
  294. // If the image is not of the desired size, resize it
  295. if (!$this->store_original && $this->crop && ($info[0] > $this->thumbnail_width || $info[1] > $this->thumbnail_height)) {
  296. try {
  297. // Temporary object, not stored in DB
  298. $img = new ImageFile(-1, $fullpath);
  299. list($width, $height, $x, $y, $w, $h) = $img->scaleToFit($this->thumbnail_width, $this->thumbnail_height, $this->crop);
  300. // The boundary box for our resizing
  301. $box = [
  302. 'width' => $width, 'height' => $height,
  303. 'x' => $x, 'y' => $y,
  304. 'w' => $w, 'h' => $h,
  305. ];
  306. $width = $box['width'];
  307. $height = $box['height'];
  308. $img->resizeTo($fullpath, $box);
  309. } catch (\Intervention\Image\Exception\NotReadableException $e) {
  310. common_log(LOG_ERR, "StoreRemoteMediaPlugin::storeRemoteThumbnail was unable to decode image with Intervention: {$e}");
  311. // No need to interrupt processing
  312. }
  313. }
  314. } else {
  315. throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' .
  316. ($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath);
  317. }
  318. } catch (AlreadyFulfilledException $e) {
  319. // Carry on
  320. } catch (Exception $err) {
  321. common_log(LOG_ERR, 'Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail ' .
  322. "but encountered error: {$err}");
  323. throw $err;
  324. } finally {
  325. unset($imgData);
  326. }
  327. return [$filename, $filehash, $width, $height];
  328. }
  329. /**
  330. * @param mixed $url
  331. *
  332. * @throws ServerException if check is made but fails
  333. *
  334. * @return bool false on no check made, provider name on success
  335. */
  336. protected function checkWhitelist($url)
  337. {
  338. if (!$this->check_whitelist) {
  339. return false; // indicates "no check made"
  340. }
  341. $host = parse_url($url, PHP_URL_HOST);
  342. foreach ($this->domain_whitelist as $regex => $provider) {
  343. if (preg_match("/{$regex}/", $host)) {
  344. return $provider; // we trust this source, return provider name
  345. }
  346. }
  347. throw new ServerException(sprintf(_m('Domain not in remote thumbnail source whitelist: %s'), $host));
  348. }
  349. /**
  350. * Event raised when GNU social polls the plugin for information about it.
  351. * Adds this plugin's version information to $versions array
  352. *
  353. * @param &$versions array inherited from parent
  354. *
  355. * @return bool true hook value
  356. */
  357. public function onPluginVersion(array &$versions): bool
  358. {
  359. $versions[] = ['name' => 'StoreRemoteMedia',
  360. 'version' => self::PLUGIN_VERSION,
  361. 'author' => 'Mikael Nordfeldth, Diogo Peralta Cordeiro',
  362. 'homepage' => GNUSOCIAL_ENGINE_URL,
  363. 'description' => // TRANS: Plugin description.
  364. _m('Plugin for downloading remotely attached files to local server.'), ];
  365. return true;
  366. }
  367. }