Embed.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. <?php
  2. // {{{ License
  3. // This file is part of GNU social - https://www.gnu.org/software/social
  4. //
  5. // GNU social is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU Affero General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // GNU social is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU Affero General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  17. // }}}
  18. /**
  19. * OEmbed and OpenGraph implementation for GNU social
  20. *
  21. * @package GNUsocial
  22. *
  23. * @author Mikael Nordfeldth
  24. * @author Stephen Paul Weber
  25. * @author hannes
  26. * @author Mikael Nordfeldth
  27. * @author Miguel Dantas
  28. * @author Diogo Peralta Cordeiro <mail@diogo.site>
  29. * @authir Hugo Sales <hugo@hsal.es>
  30. *
  31. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  32. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  33. */
  34. namespace Plugin\Embed;
  35. use App\Core\Cache;
  36. use App\Core\DB\DB;
  37. use App\Core\Event;
  38. use App\Core\GSFile;
  39. use App\Core\HTTPClient;
  40. use App\Core\Log;
  41. use App\Core\Modules\Plugin;
  42. use App\Core\Router\RouteLoader;
  43. use App\Core\Router\Router;
  44. use App\Entity\Attachment;
  45. use App\Entity\AttachmentThumbnail;
  46. use App\Util\Common;
  47. use App\Util\Exception\DuplicateFoundException;
  48. use App\Util\Exception\NotFoundException;
  49. use App\Util\Formatting;
  50. use App\Util\TemporaryFile;
  51. use Embed\Embed as LibEmbed;
  52. use Exception;
  53. use Symfony\Component\HttpFoundation\Request;
  54. /**
  55. * Base class for the Embed plugin that does most of the heavy lifting to get
  56. * and display representations for remote content.
  57. *
  58. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  59. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  60. */
  61. class Embed extends Plugin
  62. {
  63. /**
  64. * Settings which can be set in social.local.yaml
  65. * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
  66. */
  67. public $domain_allowlist = [
  68. // hostname => service provider
  69. '.*' => '', // Default to allowing any host
  70. ];
  71. /**
  72. * This code executes when GNU social creates the page routing, and we hook
  73. * on this event to add our action handler for Embed.
  74. *
  75. * @param $m URLMapper the router that was initialized.
  76. *
  77. * @return bool true if successful, the exception object if it isn't.
  78. * @throws Exception
  79. *
  80. */
  81. public function onAddRoute(RouteLoader $m): bool
  82. {
  83. $m->connect('oembed', 'main/oembed', Controller\Embed::class);
  84. $m->connect('embed', 'main/embed', Controller\Embed::class);
  85. return Event::next;
  86. }
  87. /**
  88. * Insert oembed and opengraph tags in all HTML head elements
  89. */
  90. public function onShowHeadElements(Request $request, array &$result)
  91. {
  92. $matches = [];
  93. preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
  94. switch ($matches[1]) {
  95. case 'attachment':
  96. $url = "{$matches[1]}/{$matches[2]}";
  97. break;
  98. }
  99. if (isset($url)) {
  100. foreach (['xml', 'json'] as $format) {
  101. $result[] = [
  102. 'link' => [
  103. 'rel' => 'alternate',
  104. 'type' => "application/{$format}+oembed",
  105. 'href' => Router::url('embed', ['format' => $format, 'url' => $url]),
  106. 'title' => 'oEmbed',
  107. ],];
  108. }
  109. }
  110. return Event::next;
  111. }
  112. /**
  113. * Save embedding information for an Attachment, if applicable.
  114. *
  115. * Normally this event is called through File::saveNew()
  116. *
  117. * @param Attachment $attachment The newly inserted Attachment object.
  118. *
  119. * @return bool success
  120. */
  121. public function onAttachmentStoreNew(Attachment $attachment): bool
  122. {
  123. try {
  124. DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
  125. } catch (NotFoundException) {
  126. if ($attachment->hasRemoteUrl() && $attachment->hasMimetype()) {
  127. $mimetype = $attachment->getMimetype();
  128. if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) {
  129. try {
  130. $embed_data = $this->getEmbed($attachment->getRemoteUrl(), $attachment);
  131. $embed_data['attachment_id'] = $attachment->getId();
  132. DB::persist(Entity\AttachmentEmbed::create($embed_data));
  133. DB::flush();
  134. } catch (Exception $e) {
  135. Log::warning($e);
  136. }
  137. }
  138. }
  139. } catch (DuplicateFoundException) {
  140. Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}");
  141. }
  142. return Event::next;
  143. }
  144. /**
  145. * Replace enclosure representation of an attachment with the data from embed
  146. */
  147. public function onAttachmentFileInfo(int $attachment_id, ?array &$enclosure)
  148. {
  149. try {
  150. $embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment_id]);
  151. } catch (NotFoundException) {
  152. return Event::next;
  153. }
  154. // We know about this attachment, so we 'own' it, but know
  155. // that it doesn't have an image
  156. if (!$embed->isImage()) {
  157. $enclosure = null;
  158. return Event::stop;
  159. }
  160. $enclosure = [
  161. 'filepath' => $embed->getFilepath(),
  162. 'mimetype' => $embed->getMimetype(),
  163. 'title' => $embed->getTitle(),
  164. 'width' => $embed->getWidth(),
  165. 'height' => $embed->getHeight(),
  166. 'url' => $embed->getMediaUrl(),
  167. ];
  168. return Event::stop;
  169. }
  170. /**
  171. * Show this attachment enhanced with the corresponing Embed data, if available
  172. */
  173. public function onShowAttachment(Attachment $attachment, array &$res)
  174. {
  175. try {
  176. $embed = Cache::get('attachment-embed-' . $attachment->getId(),
  177. fn() => DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]));
  178. } catch (DuplicateFoundException $e) {
  179. Log::waring($e);
  180. return Event::next;
  181. } catch (NotFoundException) {
  182. return Event::next;
  183. }
  184. if (is_null($embed) && empty($embed->getAuthorName()) && empty($embed->getProvider())) {
  185. Log::debug('Embed doesn\'t have a representation for the attachment #' . $attachment->getId());
  186. return Event::next;
  187. }
  188. $attributes = $embed->getImageHTMLAttributes(['class' => 'u-photo embed']);
  189. $res[] = Formatting::twigRenderString(<<<END
  190. <article class="h-entry embed">
  191. <header>
  192. {% if attributes != false %}
  193. <img class="u-photo embed" width="{{attributes['width']}}" height="{{attributes['height']}}" src="{{attributes['src']}}" />
  194. {% endif %}
  195. <h5 class="p-name embed">
  196. <a class="u-url" href="{{attachment.getRemoteUrl()}}">{{embed.getTitle() | escape}}</a>
  197. </h5>
  198. <div class="p-author embed">
  199. {% if embed.getAuthorName() is not null %}
  200. <div class="fn vcard author">
  201. {% if embed.getAuthorUrl() is null %}
  202. <p>{{embed.getAuthorName()}}</p>
  203. {% else %}
  204. <a href="{{embed.getAuthorUrl()}}" class="url">{{embed.getAuthorName()}}</a>
  205. {% endif %}
  206. </div>
  207. {% endif %}
  208. {% if embed.getProvider() is not null %}
  209. <div class="fn vcard">
  210. {% if embed.getProviderUrl() is null %}
  211. <p>{{embed.getProvider()}}</p>
  212. {% else %}
  213. <a href="{{embed.getProviderUrl()}}" class="url">{{embed.getProvider()}}</a>
  214. {% endif %}
  215. </div>
  216. {% endif %}
  217. </div>
  218. </header>
  219. <div class="p-summary embed">
  220. {{ embed.getHtml() | escape }}
  221. </div>
  222. </article>
  223. END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachment]);
  224. return Event::stop;
  225. }
  226. /**
  227. * @return bool false on no check made, provider name on success
  228. * @return string|false on no check made, provider name on success
  229. *
  230. * @throws ServerException if check is made but fails
  231. *
  232. */
  233. protected function checkAllowlist(string $url): string|bool
  234. {
  235. if ($this->check_allowlist ?? false) {
  236. return false; // indicates "no check made"
  237. }
  238. $host = parse_url($url, PHP_URL_HOST);
  239. foreach ($this->domain_allowlist as $regex => $provider) {
  240. if (preg_match("/{$regex}/", $host)) {
  241. return $provider; // we trust this source, return provider name
  242. }
  243. }
  244. throw new ServerException(_m('Domain not in remote thumbnail source allowlist: {host}', ['host' => $host]));
  245. }
  246. /**
  247. * Check the file size of a remote file using a HEAD request and checking
  248. * the content-length variable returned. This isn't 100% foolproof but is
  249. * reliable enough for our purposes.
  250. *
  251. * @param string $url
  252. * @param array|null $headers - if we already made a request
  253. *
  254. * @return int|null the file size if it succeeds, false otherwise.
  255. */
  256. private function getRemoteFileSize(string $url, ?array $headers = null): ?int
  257. {
  258. try {
  259. if ($headers === null) {
  260. if (!Common::isValidHttpUrl($url)) {
  261. Log::error('Invalid URL in Embed::getRemoteFileSize()');
  262. return false;
  263. }
  264. $head = HTTPClient::head($url);
  265. $headers = $head->getHeaders();
  266. $headers = array_change_key_case($headers, CASE_LOWER);
  267. }
  268. return $headers['content-length'][0] ?? false;
  269. } catch (Exception $e) {
  270. Loog::error($e);
  271. return false;
  272. }
  273. }
  274. /**
  275. * A private helper function that uses a HEAD request to check the mime type
  276. * of a remote URL to see it it's an image.
  277. *
  278. * @param mixed $url
  279. * @param null|mixed $headers
  280. *
  281. * @return bool true if the remote URL is an image, or false otherwise.
  282. */
  283. private function isRemoteImage(string $url, ?array $headers = null): bool
  284. {
  285. try {
  286. if ($headers === null) {
  287. if (!Common::isValidHttpUrl($url)) {
  288. Log::error('Invalid URL in Embed::getRemoteFileSize()');
  289. return false;
  290. }
  291. $head = HTTPClient::head($url);
  292. $headers = $head->getHeaders();
  293. $headers = array_change_key_case($headers, CASE_LOWER);
  294. }
  295. return !empty($headers['content-type']) && GSFile::mimetypeMajor($headers['content-type'][0]) === 'image';
  296. } catch (Exception $e) {
  297. Log::error($e);
  298. return false;
  299. }
  300. }
  301. /**
  302. * Validate that $imgData is a valid image, place it in it's folder and resize
  303. *
  304. * @param $imgData - The image data to validate
  305. * @param null|array $headers - The headers possible previous request to $url
  306. */
  307. protected function validateAndWriteImage($imgData, ?array $headers = null): array
  308. {
  309. $file = new TemporaryFile();
  310. $file->write($imgData);
  311. Event::handle('HashFile', [$file->getRealPath(), &$hash]);
  312. $filepath = Common::config('storage', 'dir') . "embed/{$hash}" . Common::config('thumbnail', 'extension');
  313. $width = Common::config('plugin_embed', 'width');
  314. $height = Common::config('plugin_embed', 'height');
  315. $smart_crop = Common::config('plugin_embed', 'smart_crop');
  316. Event::handle('ResizeImagePath', [$file->getRealPath(), $filepath, &$width, &$height, $smart_crop, &$mimetype]);
  317. unset($file);
  318. if (!is_null($headers) && array_key_exists('content-disposition', $headers) && preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'][0], $matches) === 1) {
  319. $original_name = $matches[1];
  320. }
  321. return [$filepath, $width, $height, $original_name ?? null, $mimetype];
  322. }
  323. /**
  324. * Fetch, Validate and Write a remote image from url to temporary file
  325. *
  326. * @param Attachment $attachment
  327. * @param string $media_url URL for the actual media representation
  328. * @return array|bool
  329. * @throws Exception
  330. */
  331. protected function fetchValidateWriteRemoteImage(Attachment $attachment, string $media_url): array|bool
  332. {
  333. if ($attachment->hasFilename() && file_exists($attachment->getPath())) {
  334. throw new AlreadyFulfilledException(_m('A thumbnail seems to already exist for remote file with id=={id}', ['id' => $attachment->getId()]));
  335. }
  336. if (Formatting::startsWith($media_url, 'file://')) {
  337. $filename = Formatting::removePrefix($media_url, 'file://');
  338. $info = getimagesize($filename);
  339. $filename = basename($filename);
  340. $width = $info[0];
  341. $height = $info[1];
  342. } else {
  343. $this->checkAllowlist($media_url);
  344. $head = HTTPClient::head($media_url);
  345. $headers = $head->getHeaders();
  346. $headers = array_change_key_case($headers, CASE_LOWER);
  347. try {
  348. $is_image = $this->isRemoteImage($media_url, $headers);
  349. if ($is_image == true) {
  350. $file_size = $this->getRemoteFileSize($media_url, $headers);
  351. $max_size = Common::config('attachments', 'file_quota');
  352. if (($file_size != false) && ($file_size > $max_size)) {
  353. throw new \Exception("Wanted to store remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted.");
  354. }
  355. } else {
  356. return false;
  357. }
  358. } catch (Exception $err) {
  359. Log::debug('Could not determine size of remote image, aborted local storage.');
  360. throw $err;
  361. }
  362. // First we download the file to memory and test whether it's actually an image file
  363. Log::debug('Downloading remote thumbnail for file id==' . $attachment->getId() . " with thumbnail URL: {$media_url}");
  364. try {
  365. $imgData = HTTPClient::get($media_url)->getContent();
  366. if (isset($imgData)) {
  367. [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData, $headers);
  368. } else {
  369. throw new UnsupportedMediaException(_m('HTTPClient returned an empty result'));
  370. }
  371. } catch (UnsupportedMediaException $e) {
  372. // Couldn't find anything that looks like an image, nothing to do
  373. Log::debug($e);
  374. return false;
  375. }
  376. }
  377. return [$filepath, $width, $height, $original_name, $mimetype];
  378. }
  379. /**
  380. * Perform an oEmbed or OpenGraph lookup for the given $url.
  381. *
  382. * Some known hosts are allowlisted with API endpoints where we
  383. * know they exist but autodiscovery data isn't available.
  384. *
  385. * Throws exceptions on failure.
  386. *
  387. * @param string $url
  388. * @param Attachment $attachment
  389. * @return array
  390. */
  391. public function getEmbed(string $url, Attachment $attachment): array
  392. {
  393. Log::info('Checking for remote URL metadata for ' . $url);
  394. try {
  395. Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
  396. $embed = new LibEmbed();
  397. $info = $embed->get($url);
  398. $metadata['title'] = $info->title;
  399. $metadata['html'] = $info->description;
  400. $metadata['author_name'] = $info->authorName;
  401. $metadata['author_url'] = $info->authorUrl;
  402. $metadata['provider_name'] = $info->providerName;
  403. $metadata['provider_url'] = $info->providerUrl;
  404. if (!is_null($info->image)) {
  405. $image_url = (string)$info->image;
  406. if (Formatting::startsWith($image_url, 'data')) {
  407. // Inline image
  408. $imgData = base64_decode(substr($info->image, stripos($info->image, 'base64,') + 7));
  409. [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData);
  410. } else {
  411. [$filepath, $width, $height, $original_name, $mimetype] = $this->fetchValidateWriteRemoteImage($attachment, $image_url);
  412. }
  413. $metadata['width'] = $width;
  414. $metadata['height'] = $height;
  415. $metadata['mimetype'] = $mimetype;
  416. $metadata['media_url'] = $image_url;
  417. $metadata['filename'] = Formatting::removePrefix($filepath, Common::config('storage', 'dir'));
  418. }
  419. } catch (Exception $e) {
  420. Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . $e->getMessage());
  421. }
  422. $metadata = self::normalize($metadata);
  423. $attachment->setTitle($metadata['title']);
  424. return $metadata;
  425. }
  426. /**
  427. * Normalize fetched info.
  428. */
  429. public static function normalize(array $data): array
  430. {
  431. if (isset($metadata['url'])) {
  432. // sometimes sites serve the path, not the full URL, for images
  433. // let's "be liberal in what you accept from others"!
  434. // add protocol and host if the thumbnail_url starts with /
  435. if ($metadata['url'][0] == '/') {
  436. $thumbnail_url_parsed = parse_url($metadata['url']);
  437. $metadata['url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}";
  438. }
  439. // Some wordpress opengraph implementations sometimes return a white blank image
  440. // no need for us to save that!
  441. if ($metadata['url'] == 'https://s0.wp.com/i/blank.jpg') {
  442. $metadata['url'] = null;
  443. }
  444. if (!isset($data['width'])) {
  445. $data['width'] = Common::config('plugin_embed', 'width');
  446. $data['height'] = Common::config('plugin_embed', 'height');
  447. }
  448. }
  449. return $data;
  450. }
  451. }