Embed.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. <?php
  2. // {{{ License
  3. // This file is part of GNU social - https://www.gnu.org/software/social
  4. //
  5. // GNU social is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU Affero General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // GNU social is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU Affero General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  17. // }}}
  18. /**
  19. * OEmbed and OpenGraph implementation for GNU social
  20. *
  21. * @package GNUsocial
  22. *
  23. * @author Mikael Nordfeldth
  24. * @author Stephen Paul Weber
  25. * @author hannes
  26. * @author Mikael Nordfeldth
  27. * @author Miguel Dantas
  28. * @author Hugo Sales <hugo@hsal.es>
  29. * @author Diogo Peralta Cordeiro <mail@diogo.site>
  30. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  31. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  32. */
  33. namespace Plugin\Embed;
  34. use App\Core\Cache;
  35. use App\Core\DB\DB;
  36. use App\Core\Event;
  37. use App\Core\GSFile;
  38. use App\Core\HTTPClient;
  39. use function App\Core\I18n\_m;
  40. use App\Core\Log;
  41. use App\Core\Modules\Plugin;
  42. use App\Core\Router\RouteLoader;
  43. use App\Core\Router\Router;
  44. use App\Entity\Attachment;
  45. use App\Entity\Link;
  46. use App\Entity\Note;
  47. use App\Util\Common;
  48. use App\Util\Exception\ClientException;
  49. use App\Util\Exception\DuplicateFoundException;
  50. use App\Util\Exception\NotFoundException;
  51. use App\Util\Exception\ServerException;
  52. use App\Util\Formatting;
  53. use App\Util\TemporaryFile;
  54. use Embed\Embed as LibEmbed;
  55. use Exception;
  56. use Symfony\Component\HttpFoundation\Request;
  57. /**
  58. * Base class for the Embed plugin that does most of the heavy lifting to get
  59. * and display representations for remote content.
  60. *
  61. * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
  62. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  63. */
  64. class Embed extends Plugin
  65. {
  66. public function version(): string
  67. {
  68. return '3.0.1';
  69. }
  70. /**
  71. * Settings which can be set in social.local.yaml
  72. * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
  73. */
  74. public bool $check_whitelist = false;
  75. public bool $check_blacklist = false;
  76. public array $domain_whitelist = [
  77. // hostname
  78. '.*', // Default to allowing any host
  79. ];
  80. public array $domain_blacklist = [];
  81. // Whether to maintain a copy of the original media or only a thumbnail of it
  82. public bool $store_image = true;
  83. public ?int $thumbnail_width;
  84. public ?int $thumbnail_height;
  85. public ?int $max_size;
  86. public ?bool $smart_crop;
  87. private function getStoreImage(): bool
  88. {
  89. return $this->store_image;
  90. }
  91. private function getMaxSize(): int
  92. {
  93. return $this->max_size ?? Common::config('attachments', 'file_quota');
  94. }
  95. private function getSmartCrop(): bool
  96. {
  97. return $this->smart_crop ?? Common::config('thumbnail', 'smart_crop');
  98. }
  99. /**
  100. * Add common favicon, embed sizes and social media image sizes
  101. * (Commented out ancient sizes)
  102. * TODO: This is a temporary "solution" until we handle different sizes properly, discuss with Eliseu
  103. *
  104. * @param array $sizes
  105. *
  106. * @return bool
  107. */
  108. public function onGetAllowedThumbnailSizes(array &$sizes): bool
  109. {
  110. $sizes[] = ['width' => 16, 'height' => 16]; // Standard favicon size for browsers
  111. $sizes[] = ['width' => 24, 'height' => 24]; // IE9 pinned site size for user interface
  112. $sizes[] = ['width' => 32, 'height' => 32]; // Standard for most desktop browsers, facebook small photo thumbnail
  113. $sizes[] = ['width' => 36, 'height' => 36]; // Facebook big photo thumbnail
  114. // $sizes[] = ['width' => 48, 'height' => 48]; // Windows site (mid-size standard favicon)
  115. $sizes[] = ['width' => 55, 'height' => 55]; // Pinterest small thumb
  116. // $sizes[] = ['width' => 57, 'height' => 57]; // Standard iOS home screen (iPod Touch, iPhone first generation to 3G)
  117. // $sizes[] = ['width' => 60, 'height' => 60]; // iPhone touch up to iOS 7
  118. // $sizes[] = ['width' => 64, 'height' => 64]; // Windows site, Safari Reader List sidebar in HiDPI/Retina
  119. // $sizes[] = ['width' => 70, 'height' => 70]; // Win 8.1 Metro tile
  120. // $sizes[] = ['width' => 72, 'height' => 72]; // iPad touch up to iOS 6
  121. // $sizes[] = ['width' => 76, 'height' => 76]; // iPad home screen icon up to iOS 7
  122. // $sizes[] = ['width' => 96, 'height' => 96]; // GoogleTV icon
  123. $sizes[] = ['width' => 110, 'height' => 110]; // Instagram profile picture
  124. // $sizes[] = ['width' => 114, 'height' => 114]; // iPhone retina touch up to iOS6
  125. $sizes[] = ['width' => 116, 'height' => 116]; // Facebook page small square shared link
  126. // $sizes[] = ['width' => 120, 'height' => 120]; // iPhone retina touch up to iOS6
  127. $sizes[] = ['width' => 128, 'height' => 128]; // Chrome Web Store icon and Small Windows 8 Star Screen Icon, Facebook profile picture smartphone
  128. // $sizes[] = ['width' => 144, 'height' => 144]; // IE10 Metro tile for pinned site, Apple iPad retina iOS 6 to iOS 7
  129. $sizes[] = ['width' => 150, 'height' => 150]; // Win 8.1 Metro tile (standard MS tile)
  130. $sizes[] = ['width' => 154, 'height' => 154]; // Facebook feed small square shared link
  131. $sizes[] = ['width' => 152, 'height' => 152]; // Apple iPad iOS 10 retina touch icon
  132. $sizes[] = ['width' => 161, 'height' => 161]; // Instagram thumbnails
  133. $sizes[] = ['width' => 165, 'height' => 165]; // Pinterest profile picture
  134. $sizes[] = ['width' => 167, 'height' => 167]; // Apple iPad Retina touch icon
  135. $sizes[] = ['width' => 170, 'height' => 170]; // Facebook Profile Picture desktop
  136. $sizes[] = ['width' => 180, 'height' => 180]; // Apple iPhone Retina touch icon, Facebook Profile Picture
  137. $sizes[] = ['width' => 192, 'height' => 192]; // Google Developer Web App Manifest Recommendation
  138. // $sizes[] = ['width' => 195, 'height' => 195]; // Opera Speed Dial icon
  139. $sizes[] = ['width' => 196, 'height' => 196]; // Chrome for Android home screen icon
  140. $sizes[] = ['width' => 200, 'height' => 200]; // GitHub profile photo
  141. $sizes[] = ['width' => 222, 'height' => 150]; // Pinterest large thumb
  142. // $sizes[] = ['width' => 228, 'height' => 228]; // Opera Coast icon
  143. $sizes[] = ['width' => 250, 'height' => 250]; // Google My Business minimum
  144. // $sizes[] = ['width' => 260, 'height' => 260];
  145. $sizes[] = ['width' => 300, 'height' => 300]; // Instagram personal profile image
  146. // $sizes[] = ['width' => 310, 'height' => 150]; // Win 8.1 wide Metro tile
  147. // $sizes[] = ['width' => 310, 'height' => 310]; // Win 8.1 big Metro tile
  148. $sizes[] = ['width' => 360, 'height' => 360];
  149. $sizes[] = ['width' => 400, 'height' => 150]; // Facebook small cover photo, facebook small fundraiser image
  150. // $sizes[] = ['width' => 400, 'height' => 300];
  151. $sizes[] = ['width' => 400, 'height' => 400]; // Twitter profile photo
  152. $sizes[] = ['width' => 470, 'height' => 174]; // Facebook event small image
  153. $sizes[] = ['width' => 470, 'height' => 246]; // Facebook feed small rectangular link
  154. $sizes[] = ['width' => 480, 'height' => 360]; // YouTube video thumbnail
  155. $sizes[] = ['width' => 484, 'height' => 252]; // Facebook page small rectangular link
  156. $sizes[] = ['width' => 510, 'height' => 510]; // Instagram feed image
  157. $sizes[] = ['width' => 512, 'height' => 512]; // Android Chrome big
  158. // $sizes[] = ['width' => 560, 'height' => 315];
  159. // $sizes[] = ['width' => 560, 'height' => 340];
  160. $sizes[] = ['width' => 600, 'height' => 900]; // Pinterest expanded pin
  161. $sizes[] = ['width' => 612, 'height' => 612]; // Instagram small photo
  162. // $sizes[] = ['width' => 640, 'height' => 385];
  163. $sizes[] = ['width' => 700, 'height' => 800]; // Twitter two image post
  164. $sizes[] = ['width' => 720, 'height' => 720]; // Google My Business
  165. $sizes[] = ['width' => 800, 'height' => 300]; // Facebook fundraiser image
  166. $sizes[] = ['width' => 800, 'height' => 800]; // Youtube channel profile image
  167. $sizes[] = ['width' => 820, 'height' => 312]; // Facebook cover photo
  168. // $sizes[] = ['width' => 853, 'height' => 505];
  169. $sizes[] = ['width' => 900, 'height' => 600]; // Instagram company photo
  170. $sizes[] = ['width' => 943, 'height' => 943]; // Big safari pinned tab
  171. $sizes[] = ['width' => 1024, 'height' => 768]; // Standard 4:3 ratio photo
  172. $sizes[] = ['width' => 1080, 'height' => 720]; // Standard 3:2 ratio photo
  173. $sizes[] = ['width' => 1080, 'height' => 1080]; // Standard 1:1 ratio photo, Instagram photo
  174. $sizes[] = ['width' => 1080, 'height' => 1350]; // Instagram portrait photo
  175. $sizes[] = ['width' => 1080, 'height' => 1920]; // Instagram story
  176. $sizes[] = ['width' => 1128, 'height' => 191]; // Instagram company cover image
  177. $sizes[] = ['width' => 1128, 'height' => 376]; // Instagram Main image
  178. $sizes[] = ['width' => 1200, 'height' => 600]; // Twitter four images
  179. $sizes[] = ['width' => 1200, 'height' => 627]; // Instagram shared link
  180. $sizes[] = ['width' => 1200, 'height' => 628]; // Facebook and Twitter shared link
  181. $sizes[] = ['width' => 1200, 'height' => 630]; // Facebook shared image
  182. $sizes[] = ['width' => 1200, 'height' => 686]; // Twitter three images
  183. $sizes[] = ['width' => 1200, 'height' => 675]; // Twitter shared image
  184. $sizes[] = ['width' => 1280, 'height' => 720]; // Standard small 16:9 ratio photo, YouTube HD
  185. $sizes[] = ['width' => 1500, 'height' => 1500]; // Twitter header photo
  186. $sizes[] = ['width' => 1584, 'height' => 396]; // Instagram personal background image
  187. $sizes[] = ['width' => 1920, 'height' => 1005]; // Facebook event image
  188. $sizes[] = ['width' => 1920, 'height' => 1080]; // Standard big 16:9 ratio photo
  189. $sizes[] = ['width' => 2048, 'height' => 1152]; // YouTube channel cover photo
  190. return Event::next;
  191. }
  192. /**
  193. * This code executes when GNU social creates the page routing, and we hook
  194. * on this event to add our action handler for Embed.
  195. *
  196. * @param RouteLoader $m the router that was initialized.
  197. *
  198. * @throws Exception
  199. *
  200. * @return bool
  201. */
  202. public function onAddRoute(RouteLoader $m): bool
  203. {
  204. $m->connect('oembed', 'main/oembed', Controller\Embed::class);
  205. $m->connect('embed', 'main/embed', Controller\Embed::class);
  206. return Event::next;
  207. }
  208. /**
  209. * Insert oembed and opengraph tags in all HTML head elements
  210. */
  211. public function onShowHeadElements(Request $request, array &$result): bool
  212. {
  213. $matches = [];
  214. preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
  215. $url = match ($matches[1]) {
  216. 'attachment' => "{$matches[1]}/{$matches[2]}",
  217. default => null,
  218. };
  219. if (is_null($url)) {
  220. foreach (['xml', 'json'] as $format) {
  221. $result[] = [
  222. 'link' => [
  223. 'rel' => 'alternate',
  224. 'type' => "application/{$format}+oembed",
  225. 'href' => Router::url('embed', ['format' => $format, 'url' => $url]),
  226. 'title' => 'oEmbed',
  227. ], ];
  228. }
  229. }
  230. return Event::next;
  231. }
  232. /**
  233. * Show this attachment enhanced with the corresponding Embed data, if available
  234. *
  235. * @param array $vars
  236. * @param array $res
  237. *
  238. * @return bool
  239. */
  240. public function onViewLink(array $vars, array &$res): bool
  241. {
  242. $link = $vars['link'];
  243. try {
  244. $embed = Cache::get('attachment-embed-' . $link->getId(),
  245. fn () => DB::findOneBy('attachment_embed', ['link_id' => $link->getId()]));
  246. } catch (DuplicateFoundException $e) {
  247. Log::warning($e);
  248. return Event::next;
  249. } catch (NotFoundException) {
  250. Log::debug("Embed doesn't have a representation for the link id={$link->getId()}. Must have been stored before the plugin was enabled.");
  251. return Event::next;
  252. }
  253. $attributes = $embed->getImageHTMLAttributes();
  254. $res[] = Formatting::twigRenderFile('embed/embedView.html.twig',
  255. ['embed' => $embed, 'attributes' => $attributes, 'link' => $link]);
  256. return Event::stop;
  257. }
  258. /**
  259. * Save embedding information for an Attachment, if applicable.
  260. *
  261. * @param Link $link
  262. * @param Note $note
  263. *
  264. * @throws DuplicateFoundException
  265. *
  266. * @return bool
  267. *
  268. *
  269. */
  270. public function onNewLinkFromNote(Link $link, Note $note): bool
  271. {
  272. // Only handle text mime
  273. $mimetype = $link->getMimetype();
  274. if (!(Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml'))) {
  275. return Event::next;
  276. }
  277. // Ignore if already handled
  278. $attachment_embed = DB::find('attachment_embed', ['link_id' => $link->getId()]);
  279. if (!is_null($attachment_embed)) {
  280. return Event::next;
  281. }
  282. // If an attachment already exist, do not create an Embed for it. Some other plugin must have done things
  283. $attachment_to_link = DB::find('attachment_to_link', ['link_id' => $link->getId()]);
  284. if (!is_null($attachment_to_link)) {
  285. $attachment_id = $attachment_to_link->getAttachmentId();
  286. try {
  287. $attachment = DB::findOneBy('attachment', ['id' => $attachment_id]);
  288. $attachment->livesIncrementAndGet();
  289. return Event::next;
  290. } catch (DuplicateFoundException | NotFoundException $e) {
  291. Log::error($e);
  292. }
  293. }
  294. // Create an Embed representation for this URL
  295. $embed_data = $this->getEmbedLibMetadata($link->getUrl());
  296. $embed_data['link_id'] = $link->getId();
  297. $img_data = $this->downloadThumbnail($embed_data['thumbnail_url']);
  298. switch ($img_data) {
  299. case null: // URL isn't usable
  300. $embed_data['thumbnail_url'] = null;
  301. // no break
  302. case false: // Thumbnail isn't acceptable
  303. DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
  304. Event::handle('AttachmentStoreNew', [&$attachment]);
  305. break;
  306. default: // String is valid image data
  307. $temp_file = new TemporaryFile();
  308. $temp_file->write($img_data);
  309. try {
  310. $attachment = GSFile::sanitizeAndStoreFileAsAttachment($temp_file);
  311. $embed_data['attachment_id'] = $attachment->getId();
  312. } catch (ClientException) {
  313. DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
  314. Event::handle('AttachmentStoreNew', [&$attachment]);
  315. }
  316. }
  317. $embed_data['attachment_id'] = $attachment->getId();
  318. DB::persist(Entity\AttachmentEmbed::create($embed_data));
  319. DB::flush();
  320. return Event::stop;
  321. }
  322. /**
  323. * @param string $url
  324. *
  325. * @return bool true if allowed by the lists, false otherwise
  326. */
  327. private function allowedLink(string $url): bool
  328. {
  329. $passed_whitelist = !$this->check_whitelist;
  330. $passed_blacklist = !$this->check_blacklist;
  331. if ($this->check_whitelist) {
  332. $passed_whitelist = false; // don't trust be default
  333. $host = parse_url($url, PHP_URL_HOST);
  334. foreach ($this->domain_whitelist as $regex => $provider) {
  335. if (preg_match("/{$regex}/", $host)) {
  336. $passed_whitelist = true; // we trust this source
  337. }
  338. }
  339. }
  340. if ($this->check_blacklist) {
  341. // assume it passed by default
  342. $host = parse_url($url, PHP_URL_HOST);
  343. foreach ($this->domain_blacklist as $regex => $provider) {
  344. if (preg_match("/{$regex}/", $host)) {
  345. $passed_blacklist = false; // we blocked this source
  346. }
  347. }
  348. }
  349. return $passed_whitelist && $passed_blacklist;
  350. }
  351. /**
  352. * Perform an oEmbed or OpenGraph lookup for the given $url.
  353. *
  354. * Some known hosts are whitelisted with API endpoints where we
  355. * know they exist but autodiscovery data isn't available.
  356. *
  357. * Throws exceptions on failure.
  358. *
  359. * @param string $url
  360. *
  361. * @return array
  362. */
  363. private function getEmbedLibMetadata(string $url): array
  364. {
  365. Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
  366. $embed = new LibEmbed();
  367. $info = $embed->get($url);
  368. $metadata['title'] = $info->title;
  369. $metadata['description'] = $info->description;
  370. $metadata['author_name'] = $info->authorName;
  371. $metadata['author_url'] = (string) $info->authorUrl;
  372. $metadata['provider_name'] = $info->providerName;
  373. $root_url = parse_url($url);
  374. $root_url = "{$root_url['scheme']}://{$root_url['host']}";
  375. $metadata['provider_url'] = (string) ($info->providerUrl != '' ? $info->providerUrl : $root_url);
  376. if (!is_null($info->image)) {
  377. $thumbnail_url = (string) $info->image;
  378. } else {
  379. $thumbnail_url = (string) $info->favicon;
  380. }
  381. // Check thumbnail URL validity
  382. $metadata['thumbnail_url'] = $thumbnail_url;
  383. return self::normalizeEmbedLibMetadata($metadata);
  384. }
  385. /**
  386. * Normalize fetched info.
  387. *
  388. * @param array $metadata
  389. *
  390. * @return array
  391. */
  392. private static function normalizeEmbedLibMetadata(array $metadata): array
  393. {
  394. if (isset($metadata['thumbnail_url'])) {
  395. // sometimes sites serve the path, not the full URL, for images
  396. // let's "be liberal in what you accept from others"!
  397. // add protocol and host if the thumbnail_url starts with /
  398. if ($metadata['thumbnail_url'][0] == '/') {
  399. $metadata['thumbnail_url'] = "{$metadata['provider_url']}{$metadata['thumbnail_url']}";
  400. }
  401. // Some wordpress opengraph implementations sometimes return a white blank image
  402. // no need for us to save that!
  403. if ($metadata['thumbnail_url'] == 'https://s0.wp.com/i/blank.jpg') {
  404. $metadata['thumbnail_url'] = null;
  405. }
  406. }
  407. return $metadata;
  408. }
  409. /**
  410. * Private helper that:
  411. * - checks if given URL is valid and is in fact an image (basic test), returns null if not;
  412. * - checks if respects file quota and whitelist/blacklist, returns false if not;
  413. * - downloads the thumbnail, returns a string if successful.
  414. *
  415. * @param string $url URL to the remote thumbnail
  416. *
  417. * @return null|bool|string
  418. */
  419. private function downloadThumbnail(string $url): bool|string|null
  420. {
  421. // Is this a valid URL?
  422. if (!Common::isValidHttpUrl($url)) {
  423. Log::debug("Invalid URL ({$url}) in Embed->downloadThumbnail.");
  424. return null;
  425. }
  426. // Is this URL trusted?
  427. if (!$this->allowedLink($url)) {
  428. Log::info("Blocked URL ({$url}) in Embed->downloadThumbnail.");
  429. return false;
  430. }
  431. // Validate if the URL really does point to a remote image
  432. $head = HTTPClient::head($url);
  433. $headers = $head->getHeaders();
  434. $headers = array_change_key_case($headers, CASE_LOWER);
  435. if (empty($headers['content-type']) || GSFile::mimetypeMajor($headers['content-type'][0]) !== 'image') {
  436. Log::debug("URL ({$url}) doesn't point to an image (content-type: " . (!empty($headers['content-type'][0]) ? $headers['content-type'][0] : 'not available') . ') in Embed->downloadThumbnail.');
  437. return null;
  438. }
  439. // Does it respect the file quota?
  440. $file_size = $headers['content-length'][0];
  441. $max_size = Common::config('attachments', 'file_quota');
  442. if ($file_size > $max_size) {
  443. Log::debug("Went to download remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted in Embed->downloadThumbnail.");
  444. return false;
  445. }
  446. // Download and return the file
  447. Log::debug("Downloading remote thumbnail from URL: {$url} in Embed->downloadThumbnail.");
  448. return HTTPClient::get($url)->getContent();
  449. }
  450. /**
  451. * Event raised when GNU social polls the plugin for information about it.
  452. * Adds this plugin's version information to $versions array
  453. *
  454. * @param array $versions inherited from parent
  455. *
  456. * @throws ServerException
  457. *
  458. * @return bool true hook value
  459. *
  460. */
  461. public function onPluginVersion(array &$versions): bool
  462. {
  463. $versions[] = [
  464. 'name' => 'Embed',
  465. 'version' => $this->version(),
  466. 'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro',
  467. 'homepage' => GNUSOCIAL_PROJECT_URL,
  468. 'description' => // TRANS: Plugin description.
  469. _m('Plugin for using and representing oEmbed, OpenGraph and other data.'),
  470. ];
  471. return Event::next;
  472. }
  473. }