Embed.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. <?php
  2. declare(strict_types = 1);
  3. // {{{ License
  4. // This file is part of GNU social - https://www.gnu.org/software/social
  5. //
  6. // GNU social is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU Affero General Public License as published by
  8. // the Free Software Foundation, either version 3 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // GNU social is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU Affero General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Affero General Public License
  17. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  18. // }}}
  19. /**
  20. * OEmbed and OpenGraph implementation for GNU social
  21. *
  22. * @package GNUsocial
  23. *
  24. * @author Mikael Nordfeldth
  25. * @author Stephen Paul Weber
  26. * @author hannes
  27. * @author Mikael Nordfeldth
  28. * @author Miguel Dantas
  29. * @author Hugo Sales <hugo@hsal.es>
  30. * @author Diogo Peralta Cordeiro <mail@diogo.site>
  31. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  32. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  33. */
  34. namespace Plugin\Embed;
  35. use App\Core\Cache;
  36. use App\Core\DB\DB;
  37. use App\Core\Event;
  38. use App\Core\GSFile;
  39. use App\Core\HTTPClient;
  40. use function App\Core\I18n\_m;
  41. use App\Core\Log;
  42. use App\Core\Modules\Plugin;
  43. use App\Core\Router\RouteLoader;
  44. use App\Core\Router\Router;
  45. use App\Entity\Note;
  46. use App\Util\Common;
  47. use App\Util\Exception\ClientException;
  48. use App\Util\Exception\DuplicateFoundException;
  49. use App\Util\Exception\NotFoundException;
  50. use App\Util\Exception\ServerException;
  51. use App\Util\Formatting;
  52. use App\Util\TemporaryFile;
  53. use Component\Attachment\Entity\Attachment;
  54. use Component\Link\Entity\Link;
  55. use Embed\Embed as LibEmbed;
  56. use Exception;
  57. use Symfony\Component\HttpFoundation\Request;
  58. use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface;
  59. use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface;
  60. use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface;
  61. use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface;
  62. /**
  63. * Base class for the Embed plugin that does most of the heavy lifting to get
  64. * and display representations for remote content.
  65. *
  66. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  67. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  68. */
  69. class Embed extends Plugin
  70. {
  71. public function version(): string
  72. {
  73. return '3.0.1';
  74. }
  75. /**
  76. * Settings which can be set in social.local.yaml
  77. * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
  78. */
  79. public bool $check_whitelist = false;
  80. public bool $check_blacklist = false;
  81. public array $domain_whitelist = [
  82. // hostname
  83. '.*', // Default to allowing any host
  84. ];
  85. public array $domain_blacklist = [];
  86. // Whether to maintain a copy of the original media or only a thumbnail of it
  87. public bool $store_image = true;
  88. public ?int $thumbnail_width;
  89. public ?int $thumbnail_height;
  90. public ?int $max_size;
  91. public ?bool $smart_crop;
  92. // TODO: storeThumbs setting
  93. private function getMaxFileSize(): int
  94. {
  95. return min(Common::config('plugin_embed', 'max_file_size'), Common::config('attachments', 'file_quota'));
  96. }
  97. /**
  98. * This code executes when GNU social creates the page routing, and we hook
  99. * on this event to add our action handler for Embed.
  100. *
  101. * @param RouteLoader $m the router that was initialized
  102. *
  103. * @throws Exception
  104. */
  105. public function onAddRoute(RouteLoader $m): bool
  106. {
  107. $m->connect('oembed', 'main/oembed', Controller\OEmbed::class);
  108. return Event::next;
  109. }
  110. /**
  111. * Insert oembed and opengraph tags in all HTML head elements
  112. */
  113. public function onShowHeadElements(Request $request, array &$result): bool
  114. {
  115. $matches = [];
  116. preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
  117. $url = match ($matches[1]) {
  118. 'attachment' => "{$matches[1]}/{$matches[2]}",
  119. default => null,
  120. };
  121. if (\is_null($url)) {
  122. foreach (['xml', 'json'] as $format) {
  123. $result[] = [
  124. 'link' => [
  125. 'rel' => 'alternate',
  126. 'type' => "application/{$format}+oembed",
  127. 'href' => Router::url('oembed', ['format' => $format, 'url' => $url]),
  128. 'title' => 'oEmbed',
  129. ], ];
  130. }
  131. }
  132. return Event::next;
  133. }
  134. /**
  135. * Show this attachment enhanced with the corresponding Embed data, if available
  136. */
  137. public function onViewLink(array $vars, array &$res): bool
  138. {
  139. $link = $vars['link'];
  140. try {
  141. $embed = Cache::get(
  142. 'attachment-embed-' . $link->getId(),
  143. fn () => DB::findOneBy('attachment_embed', ['link_id' => $link->getId()]),
  144. );
  145. } catch (DuplicateFoundException $e) {
  146. Log::warning($e->getMessage());
  147. return Event::next;
  148. } catch (NotFoundException) {
  149. Log::debug("Embed doesn't have a representation for the link id={$link->getId()}. Must have been stored before the plugin was enabled.");
  150. return Event::next;
  151. }
  152. $attributes = $embed->getImageHTMLAttributes();
  153. $res[] = Formatting::twigRenderFile(
  154. 'embed/embedView.html.twig',
  155. ['embed' => $embed, 'attributes' => $attributes, 'link' => $link, 'note' => $vars['note']],
  156. );
  157. return Event::stop;
  158. }
  159. /**
  160. * Save embedding information for an Attachment, if applicable.
  161. *
  162. * @throws DuplicateFoundException
  163. */
  164. public function onNewLinkFromNote(Link $link, Note $note): bool
  165. {
  166. // Only handle text mime
  167. $mimetype = $link->getMimetype();
  168. if (\is_null($mimetype) || !(Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml'))) {
  169. return Event::next;
  170. }
  171. // Ignore if already handled
  172. $attachment_embed = DB::find('attachment_embed', ['link_id' => $link->getId()]);
  173. if (!\is_null($attachment_embed)) {
  174. return Event::next;
  175. }
  176. // If an attachment already exist, do not create an Embed for it. Some other plugin must have done things
  177. $attachment_to_link = DB::find('attachment_to_link', ['link_id' => $link->getId()]);
  178. if (!\is_null($attachment_to_link)) {
  179. $attachment_id = $attachment_to_link->getAttachmentId();
  180. try {
  181. $attachment = DB::findOneBy('attachment', ['id' => $attachment_id]);
  182. $attachment->livesIncrementAndGet();
  183. return Event::next;
  184. } catch (DuplicateFoundException|NotFoundException $e) {
  185. Log::error($e->getMessage(), context: [$e]);
  186. }
  187. }
  188. // Create an Embed representation for this URL
  189. $embed_data = $this->getEmbedLibMetadata($link->getUrl());
  190. $embed_data['link_id'] = $link->getId();
  191. $img_data = $this->downloadThumbnail($embed_data['thumbnail_url']);
  192. switch ($img_data) {
  193. case null: // URL isn't usable
  194. $embed_data['thumbnail_url'] = null;
  195. // no break
  196. case false: // Thumbnail isn't acceptable
  197. DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
  198. Event::handle('AttachmentStoreNew', [&$attachment]);
  199. break;
  200. default: // String is valid image data
  201. $temp_file = new TemporaryFile();
  202. $temp_file->write($img_data);
  203. try {
  204. $attachment = GSFile::storeFileAsAttachment($temp_file);
  205. $embed_data['attachment_id'] = $attachment->getId();
  206. } catch (ClientException) {
  207. DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
  208. Event::handle('AttachmentStoreNew', [&$attachment]);
  209. }
  210. }
  211. $embed_data['attachment_id'] = $attachment->getId();
  212. DB::persist(Entity\AttachmentEmbed::create($embed_data));
  213. DB::flush();
  214. return Event::stop;
  215. }
  216. /**
  217. * @return bool true if allowed by the lists, false otherwise
  218. */
  219. private function allowedLink(string $url): bool
  220. {
  221. $passed_whitelist = !$this->check_whitelist;
  222. $passed_blacklist = !$this->check_blacklist;
  223. if ($this->check_whitelist) {
  224. $passed_whitelist = false; // don't trust be default
  225. $host = parse_url($url, \PHP_URL_HOST);
  226. foreach ($this->domain_whitelist as $regex => $provider) {
  227. if (preg_match("/{$regex}/", $host)) {
  228. $passed_whitelist = true; // we trust this source
  229. }
  230. }
  231. }
  232. if ($this->check_blacklist) {
  233. // assume it passed by default
  234. $host = parse_url($url, \PHP_URL_HOST);
  235. foreach ($this->domain_blacklist as $regex => $provider) {
  236. if (preg_match("/{$regex}/", $host)) {
  237. $passed_blacklist = false; // we blocked this source
  238. }
  239. }
  240. }
  241. return $passed_whitelist && $passed_blacklist;
  242. }
  243. /**
  244. * Perform an oEmbed or OpenGraph lookup for the given $url.
  245. *
  246. * Some known hosts are whitelisted with API endpoints where we
  247. * know they exist but autodiscovery data isn't available.
  248. *
  249. * Throws exceptions on failure.
  250. */
  251. private function getEmbedLibMetadata(string $url): array
  252. {
  253. Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
  254. $embed = new LibEmbed();
  255. $info = $embed->get($url);
  256. $metadata['title'] = $info->title;
  257. $metadata['description'] = $info->description;
  258. $metadata['author_name'] = $info->authorName;
  259. $root_url = parse_url($url);
  260. $root_url = "{$root_url['scheme']}://{$root_url['host']}";
  261. $metadata['author_url'] = $info->authorUrl ? (string) $info->authorUrl : $root_url;
  262. $metadata['provider_name'] = $info->providerName;
  263. $metadata['provider_url'] = (string) ($info->providerUrl ?? $metadata['author_name']);
  264. if (!\is_null($info->image)) {
  265. $thumbnail_url = (string) $info->image;
  266. } else {
  267. $thumbnail_url = (string) $info->favicon;
  268. }
  269. // Check thumbnail URL validity
  270. $metadata['thumbnail_url'] = $thumbnail_url;
  271. return self::normalizeEmbedLibMetadata($metadata);
  272. }
  273. /**
  274. * Normalize fetched info.
  275. */
  276. private static function normalizeEmbedLibMetadata(array $metadata): array
  277. {
  278. if (isset($metadata['thumbnail_url'])) {
  279. // sometimes sites serve the path, not the full URL, for images
  280. // let's "be liberal in what you accept from others"!
  281. // add protocol and host if the thumbnail_url starts with /
  282. if ($metadata['thumbnail_url'][0] == '/') {
  283. $metadata['thumbnail_url'] = "{$metadata['provider_url']}{$metadata['thumbnail_url']}";
  284. }
  285. // Some wordpress opengraph implementations sometimes return a white blank image
  286. // no need for us to save that!
  287. if ($metadata['thumbnail_url'] == 'https://s0.wp.com/i/blank.jpg') {
  288. $metadata['thumbnail_url'] = null;
  289. }
  290. }
  291. return $metadata;
  292. }
  293. /**
  294. * Private helper that:
  295. * - checks if given URL is valid and is in fact an image (basic test), returns null if not;
  296. * - checks if respects file quota and whitelist/blacklist, returns false if not;
  297. * - downloads the thumbnail, returns a string if successful.
  298. *
  299. * @param string $url URL to the remote thumbnail
  300. */
  301. private function downloadThumbnail(string $url): bool|string|null
  302. {
  303. // Is this a valid URL?
  304. if (!Common::isValidHttpUrl($url)) {
  305. Log::debug("Invalid URL ({$url}) in Embed->downloadThumbnail.");
  306. return null;
  307. }
  308. // Is this URL trusted?
  309. if (!$this->allowedLink($url)) {
  310. Log::info("Blocked URL ({$url}) in Embed->downloadThumbnail.");
  311. return false;
  312. }
  313. // Validate if the URL really does point to a remote image
  314. $head = HTTPClient::head($url);
  315. try {
  316. $headers = $head->getHeaders();
  317. } catch (ClientExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) {
  318. Log::debug('Embed->downloadThumbnail@HTTPHead->getHeaders: ' . $e->getMessage(), [$e]);
  319. return null;
  320. }
  321. if (empty($headers['content-type']) || GSFile::mimetypeMajor($headers['content-type'][0]) !== 'image') {
  322. Log::debug("URL ({$url}) doesn't point to an image (content-type: " . (!empty($headers['content-type'][0]) ? $headers['content-type'][0] : 'not available') . ') in Embed->downloadThumbnail.');
  323. return null;
  324. }
  325. // Does it respect the file quota?
  326. $file_size = $headers['content-length'][0] ?? null;
  327. $max_size = $this->getMaxFileSize();
  328. if (\is_null($file_size) || $file_size > $max_size) {
  329. Log::debug("Went to download remote thumbnail of size {$file_size} but the plugin's filesize limit is {$max_size} so we aborted in Embed->downloadThumbnail.");
  330. return false;
  331. }
  332. // Download and return the file
  333. Log::debug("Downloading remote thumbnail from URL: {$url} in Embed->downloadThumbnail.");
  334. return HTTPClient::get($url)->getContent();
  335. }
  336. public function onAttachmentGetBestTitle(Attachment $attachment, Note $note, ?string &$title)
  337. {
  338. try {
  339. $embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
  340. $title = $embed->getTitle();
  341. return Event::stop;
  342. } catch (NotFoundException) {
  343. }
  344. return Event::next;
  345. }
  346. /**
  347. * Event raised when GNU social polls the plugin for information about it.
  348. * Adds this plugin's version information to $versions array
  349. *
  350. * @param array $versions inherited from parent
  351. *
  352. * @throws ServerException
  353. *
  354. * @return bool true hook value
  355. */
  356. public function onPluginVersion(array &$versions): bool
  357. {
  358. $versions[] = [
  359. 'name' => 'Embed',
  360. 'version' => $this->version(),
  361. 'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro',
  362. 'homepage' => GNUSOCIAL_PROJECT_URL,
  363. 'description', // TRANS: Plugin description. => _m('Plugin for using and representing oEmbed, OpenGraph and other data.'),
  364. ];
  365. return Event::next;
  366. }
  367. }