Embed.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. <?php
  2. declare(strict_types = 1);
  3. // {{{ License
  4. // This file is part of GNU social - https://www.gnu.org/software/social
  5. //
  6. // GNU social is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU Affero General Public License as published by
  8. // the Free Software Foundation, either version 3 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // GNU social is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU Affero General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Affero General Public License
  17. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  18. // }}}
  19. /**
  20. * OEmbed and OpenGraph implementation for GNU social
  21. *
  22. * @package GNUsocial
  23. *
  24. * @author Mikael Nordfeldth
  25. * @author Stephen Paul Weber
  26. * @author hannes
  27. * @author Mikael Nordfeldth
  28. * @author Miguel Dantas
  29. * @author Hugo Sales <hugo@hsal.es>
  30. * @author Diogo Peralta Cordeiro <mail@diogo.site>
  31. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  32. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  33. */
  34. namespace Plugin\Embed;
  35. use App\Core\Cache;
  36. use App\Core\DB\DB;
  37. use App\Core\Event;
  38. use App\Core\GSFile;
  39. use App\Core\HTTPClient;
  40. use function App\Core\I18n\_m;
  41. use App\Core\Log;
  42. use App\Core\Modules\Plugin;
  43. use App\Core\Router\RouteLoader;
  44. use App\Core\Router\Router;
  45. use App\Entity\Note;
  46. use App\Util\Common;
  47. use App\Util\Exception\ClientException;
  48. use App\Util\Exception\DuplicateFoundException;
  49. use App\Util\Exception\NotFoundException;
  50. use App\Util\Exception\ServerException;
  51. use App\Util\Formatting;
  52. use App\Util\TemporaryFile;
  53. use Component\Attachment\Entity\Attachment;
  54. use Component\Link\Entity\Link;
  55. use Embed\Embed as LibEmbed;
  56. use Exception;
  57. use Symfony\Component\HttpFoundation\Request;
  58. use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface;
  59. use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface;
  60. use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface;
  61. use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface;
  62. /**
  63. * Base class for the Embed plugin that does most of the heavy lifting to get
  64. * and display representations for remote content.
  65. *
  66. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  67. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  68. */
  69. class Embed extends Plugin
  70. {
  71. public function version(): string
  72. {
  73. return '3.0.1';
  74. }
  75. /**
  76. * Settings which can be set in social.local.yaml
  77. * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
  78. */
  79. public bool $check_whitelist = false;
  80. public bool $check_blacklist = false;
  81. public array $domain_whitelist = [
  82. // hostname
  83. '.*', // Default to allowing any host
  84. ];
  85. public array $domain_blacklist = [];
  86. // Whether to maintain a copy of the original media or only a thumbnail of it
  87. public bool $store_image = true;
  88. public ?int $thumbnail_width;
  89. public ?int $thumbnail_height;
  90. public ?int $max_size;
  91. public ?bool $smart_crop;
  92. private function getStoreImage(): bool
  93. {
  94. return $this->store_image;
  95. }
  96. private function getMaxSize(): int
  97. {
  98. return $this->max_size ?? Common::config('attachments', 'file_quota');
  99. }
  100. private function getSmartCrop(): bool
  101. {
  102. return $this->smart_crop ?? Common::config('thumbnail', 'smart_crop');
  103. }
  104. /**
  105. * This code executes when GNU social creates the page routing, and we hook
  106. * on this event to add our action handler for Embed.
  107. *
  108. * @param RouteLoader $m the router that was initialized
  109. *
  110. * @throws Exception
  111. */
  112. public function onAddRoute(RouteLoader $m): bool
  113. {
  114. $m->connect('oembed', 'main/oembed', Controller\OEmbed::class);
  115. return Event::next;
  116. }
  117. /**
  118. * Insert oembed and opengraph tags in all HTML head elements
  119. */
  120. public function onShowHeadElements(Request $request, array &$result): bool
  121. {
  122. $matches = [];
  123. preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
  124. $url = match ($matches[1]) {
  125. 'attachment' => "{$matches[1]}/{$matches[2]}",
  126. default => null,
  127. };
  128. if (\is_null($url)) {
  129. foreach (['xml', 'json'] as $format) {
  130. $result[] = [
  131. 'link' => [
  132. 'rel' => 'alternate',
  133. 'type' => "application/{$format}+oembed",
  134. 'href' => Router::url('oembed', ['format' => $format, 'url' => $url]),
  135. 'title' => 'oEmbed',
  136. ], ];
  137. }
  138. }
  139. return Event::next;
  140. }
  141. /**
  142. * Show this attachment enhanced with the corresponding Embed data, if available
  143. */
  144. public function onViewLink(array $vars, array &$res): bool
  145. {
  146. $link = $vars['link'];
  147. try {
  148. $embed = Cache::get(
  149. 'attachment-embed-' . $link->getId(),
  150. fn () => DB::findOneBy('attachment_embed', ['link_id' => $link->getId()]),
  151. );
  152. } catch (DuplicateFoundException $e) {
  153. Log::warning($e->getMessage());
  154. return Event::next;
  155. } catch (NotFoundException) {
  156. Log::debug("Embed doesn't have a representation for the link id={$link->getId()}. Must have been stored before the plugin was enabled.");
  157. return Event::next;
  158. }
  159. $attributes = $embed->getImageHTMLAttributes();
  160. $res[] = Formatting::twigRenderFile(
  161. 'embed/embedView.html.twig',
  162. ['embed' => $embed, 'attributes' => $attributes, 'link' => $link],
  163. );
  164. return Event::stop;
  165. }
  166. /**
  167. * Save embedding information for an Attachment, if applicable.
  168. *
  169. * @throws DuplicateFoundException
  170. */
  171. public function onNewLinkFromNote(Link $link, Note $note): bool
  172. {
  173. // Only handle text mime
  174. $mimetype = $link->getMimetype();
  175. if (\is_null($mimetype) || !(Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml'))) {
  176. return Event::next;
  177. }
  178. // Ignore if already handled
  179. $attachment_embed = DB::find('attachment_embed', ['link_id' => $link->getId()]);
  180. if (!\is_null($attachment_embed)) {
  181. return Event::next;
  182. }
  183. // If an attachment already exist, do not create an Embed for it. Some other plugin must have done things
  184. $attachment_to_link = DB::find('attachment_to_link', ['link_id' => $link->getId()]);
  185. if (!\is_null($attachment_to_link)) {
  186. $attachment_id = $attachment_to_link->getAttachmentId();
  187. try {
  188. $attachment = DB::findOneBy('attachment', ['id' => $attachment_id]);
  189. $attachment->livesIncrementAndGet();
  190. return Event::next;
  191. } catch (DuplicateFoundException|NotFoundException $e) {
  192. Log::error($e);
  193. }
  194. }
  195. // Create an Embed representation for this URL
  196. $embed_data = $this->getEmbedLibMetadata($link->getUrl());
  197. $embed_data['link_id'] = $link->getId();
  198. $img_data = $this->downloadThumbnail($embed_data['thumbnail_url']);
  199. switch ($img_data) {
  200. case null: // URL isn't usable
  201. $embed_data['thumbnail_url'] = null;
  202. // no break
  203. case false: // Thumbnail isn't acceptable
  204. DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
  205. Event::handle('AttachmentStoreNew', [&$attachment]);
  206. break;
  207. default: // String is valid image data
  208. $temp_file = new TemporaryFile();
  209. $temp_file->write($img_data);
  210. try {
  211. $attachment = GSFile::storeFileAsAttachment($temp_file);
  212. $embed_data['attachment_id'] = $attachment->getId();
  213. } catch (ClientException) {
  214. DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
  215. Event::handle('AttachmentStoreNew', [&$attachment]);
  216. }
  217. }
  218. $embed_data['attachment_id'] = $attachment->getId();
  219. DB::persist(Entity\AttachmentEmbed::create($embed_data));
  220. DB::flush();
  221. return Event::stop;
  222. }
  223. /**
  224. * @return bool true if allowed by the lists, false otherwise
  225. */
  226. private function allowedLink(string $url): bool
  227. {
  228. $passed_whitelist = !$this->check_whitelist;
  229. $passed_blacklist = !$this->check_blacklist;
  230. if ($this->check_whitelist) {
  231. $passed_whitelist = false; // don't trust be default
  232. $host = parse_url($url, \PHP_URL_HOST);
  233. foreach ($this->domain_whitelist as $regex => $provider) {
  234. if (preg_match("/{$regex}/", $host)) {
  235. $passed_whitelist = true; // we trust this source
  236. }
  237. }
  238. }
  239. if ($this->check_blacklist) {
  240. // assume it passed by default
  241. $host = parse_url($url, \PHP_URL_HOST);
  242. foreach ($this->domain_blacklist as $regex => $provider) {
  243. if (preg_match("/{$regex}/", $host)) {
  244. $passed_blacklist = false; // we blocked this source
  245. }
  246. }
  247. }
  248. return $passed_whitelist && $passed_blacklist;
  249. }
  250. /**
  251. * Perform an oEmbed or OpenGraph lookup for the given $url.
  252. *
  253. * Some known hosts are whitelisted with API endpoints where we
  254. * know they exist but autodiscovery data isn't available.
  255. *
  256. * Throws exceptions on failure.
  257. */
  258. private function getEmbedLibMetadata(string $url): array
  259. {
  260. Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
  261. $embed = new LibEmbed();
  262. $info = $embed->get($url);
  263. $metadata['title'] = $info->title;
  264. $metadata['description'] = $info->description;
  265. $metadata['author_name'] = $info->authorName;
  266. $root_url = parse_url($url);
  267. $root_url = "{$root_url['scheme']}://{$root_url['host']}";
  268. $metadata['author_url'] = $info->authorUrl ? (string) $info->authorUrl : $root_url;
  269. $metadata['provider_name'] = $info->providerName;
  270. $metadata['provider_url'] = (string) $info->providerUrl ?? $metadata['author_name'];
  271. if (!\is_null($info->image)) {
  272. $thumbnail_url = (string) $info->image;
  273. } else {
  274. $thumbnail_url = (string) $info->favicon;
  275. }
  276. // Check thumbnail URL validity
  277. $metadata['thumbnail_url'] = $thumbnail_url;
  278. return self::normalizeEmbedLibMetadata($metadata);
  279. }
  280. /**
  281. * Normalize fetched info.
  282. */
  283. private static function normalizeEmbedLibMetadata(array $metadata): array
  284. {
  285. if (isset($metadata['thumbnail_url'])) {
  286. // sometimes sites serve the path, not the full URL, for images
  287. // let's "be liberal in what you accept from others"!
  288. // add protocol and host if the thumbnail_url starts with /
  289. if ($metadata['thumbnail_url'][0] == '/') {
  290. $metadata['thumbnail_url'] = "{$metadata['provider_url']}{$metadata['thumbnail_url']}";
  291. }
  292. // Some wordpress opengraph implementations sometimes return a white blank image
  293. // no need for us to save that!
  294. if ($metadata['thumbnail_url'] == 'https://s0.wp.com/i/blank.jpg') {
  295. $metadata['thumbnail_url'] = null;
  296. }
  297. }
  298. return $metadata;
  299. }
  300. /**
  301. * Private helper that:
  302. * - checks if given URL is valid and is in fact an image (basic test), returns null if not;
  303. * - checks if respects file quota and whitelist/blacklist, returns false if not;
  304. * - downloads the thumbnail, returns a string if successful.
  305. *
  306. * @param string $url URL to the remote thumbnail
  307. */
  308. private function downloadThumbnail(string $url): bool|string|null
  309. {
  310. // Is this a valid URL?
  311. if (!Common::isValidHttpUrl($url)) {
  312. Log::debug("Invalid URL ({$url}) in Embed->downloadThumbnail.");
  313. return null;
  314. }
  315. // Is this URL trusted?
  316. if (!$this->allowedLink($url)) {
  317. Log::info("Blocked URL ({$url}) in Embed->downloadThumbnail.");
  318. return false;
  319. }
  320. // Validate if the URL really does point to a remote image
  321. $head = HTTPClient::head($url);
  322. try {
  323. $headers = $head->getHeaders();
  324. } catch (ClientExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) {
  325. Log::debug('Embed->downloadThumbnail@HTTPHead->getHeaders: ' . $e->getMessage());
  326. return null;
  327. }
  328. $headers = array_change_key_case($headers, \CASE_LOWER);
  329. if (empty($headers['content-type']) || GSFile::mimetypeMajor($headers['content-type'][0]) !== 'image') {
  330. Log::debug("URL ({$url}) doesn't point to an image (content-type: " . (!empty($headers['content-type'][0]) ? $headers['content-type'][0] : 'not available') . ') in Embed->downloadThumbnail.');
  331. return null;
  332. }
  333. // Does it respect the file quota?
  334. $file_size = $headers['content-length'][0] ?? null;
  335. $max_size = Common::config('attachments', 'file_quota');
  336. if (\is_null($file_size) || $file_size > $max_size) {
  337. Log::debug("Went to download remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted in Embed->downloadThumbnail.");
  338. return false;
  339. }
  340. // Download and return the file
  341. Log::debug("Downloading remote thumbnail from URL: {$url} in Embed->downloadThumbnail.");
  342. return HTTPClient::get($url)->getContent();
  343. }
  344. public function onAttachmentGetBestTitle(Attachment $attachment, Note $note, ?string &$title)
  345. {
  346. try {
  347. $embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
  348. $title = $embed->getTitle();
  349. return Event::stop;
  350. } catch (NotFoundException) {
  351. }
  352. return Event::next;
  353. }
  354. /**
  355. * Event raised when GNU social polls the plugin for information about it.
  356. * Adds this plugin's version information to $versions array
  357. *
  358. * @param array $versions inherited from parent
  359. *
  360. * @throws ServerException
  361. *
  362. * @return bool true hook value
  363. */
  364. public function onPluginVersion(array &$versions): bool
  365. {
  366. $versions[] = [
  367. 'name' => 'Embed',
  368. 'version' => $this->version(),
  369. 'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro',
  370. 'homepage' => GNUSOCIAL_PROJECT_URL,
  371. 'description', // TRANS: Plugin description. => _m('Plugin for using and representing oEmbed, OpenGraph and other data.'),
  372. ];
  373. return Event::next;
  374. }
  375. }