123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414 |
- <?php
- declare(strict_types = 1);
- // {{{ License
- // This file is part of GNU social - https://www.gnu.org/software/social
- //
- // GNU social is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Affero General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // GNU social is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Affero General Public License for more details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
- // }}}
- /**
- * OEmbed and OpenGraph implementation for GNU social
- *
- * @package GNUsocial
- *
- * @author Mikael Nordfeldth
- * @author Stephen Paul Weber
- * @author hannes
- * @author Mikael Nordfeldth
- * @author Miguel Dantas
- * @author Hugo Sales <hugo@hsal.es>
- * @author Diogo Peralta Cordeiro <mail@diogo.site>
- * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
- * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
- */
- namespace Plugin\Embed;
- use App\Core\Cache;
- use App\Core\DB\DB;
- use App\Core\Event;
- use App\Core\GSFile;
- use App\Core\HTTPClient;
- use function App\Core\I18n\_m;
- use App\Core\Log;
- use App\Core\Modules\Plugin;
- use App\Core\Router\RouteLoader;
- use App\Core\Router\Router;
- use App\Entity\Note;
- use App\Util\Common;
- use App\Util\Exception\ClientException;
- use App\Util\Exception\DuplicateFoundException;
- use App\Util\Exception\NotFoundException;
- use App\Util\Exception\ServerException;
- use App\Util\Formatting;
- use App\Util\TemporaryFile;
- use Component\Attachment\Entity\Attachment;
- use Component\Link\Entity\Link;
- use Embed\Embed as LibEmbed;
- use Exception;
- use Symfony\Component\HttpFoundation\Request;
- use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface;
- use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface;
- use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface;
- use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface;
- /**
- * Base class for the Embed plugin that does most of the heavy lifting to get
- * and display representations for remote content.
- *
- * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
- * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
- */
- class Embed extends Plugin
- {
- public function version(): string
- {
- return '3.0.1';
- }
- /**
- * Settings which can be set in social.local.yaml
- * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
- */
- public bool $check_whitelist = false;
- public bool $check_blacklist = false;
- public array $domain_whitelist = [
- // hostname
- '.*', // Default to allowing any host
- ];
- public array $domain_blacklist = [];
- // Whether to maintain a copy of the original media or only a thumbnail of it
- public bool $store_image = true;
- public ?int $thumbnail_width;
- public ?int $thumbnail_height;
- public ?int $max_size;
- public ?bool $smart_crop;
- private function getStoreImage(): bool
- {
- return $this->store_image;
- }
- private function getMaxSize(): int
- {
- return $this->max_size ?? Common::config('attachments', 'file_quota');
- }
- private function getSmartCrop(): bool
- {
- return $this->smart_crop ?? Common::config('thumbnail', 'smart_crop');
- }
- /**
- * This code executes when GNU social creates the page routing, and we hook
- * on this event to add our action handler for Embed.
- *
- * @param RouteLoader $m the router that was initialized
- *
- * @throws Exception
- */
- public function onAddRoute(RouteLoader $m): bool
- {
- $m->connect('oembed', 'main/oembed', Controller\OEmbed::class);
- return Event::next;
- }
- /**
- * Insert oembed and opengraph tags in all HTML head elements
- */
- public function onShowHeadElements(Request $request, array &$result): bool
- {
- $matches = [];
- preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
- $url = match ($matches[1]) {
- 'attachment' => "{$matches[1]}/{$matches[2]}",
- default => null,
- };
- if (\is_null($url)) {
- foreach (['xml', 'json'] as $format) {
- $result[] = [
- 'link' => [
- 'rel' => 'alternate',
- 'type' => "application/{$format}+oembed",
- 'href' => Router::url('oembed', ['format' => $format, 'url' => $url]),
- 'title' => 'oEmbed',
- ], ];
- }
- }
- return Event::next;
- }
- /**
- * Show this attachment enhanced with the corresponding Embed data, if available
- */
- public function onViewLink(array $vars, array &$res): bool
- {
- $link = $vars['link'];
- try {
- $embed = Cache::get(
- 'attachment-embed-' . $link->getId(),
- fn () => DB::findOneBy('attachment_embed', ['link_id' => $link->getId()]),
- );
- } catch (DuplicateFoundException $e) {
- Log::warning($e->getMessage());
- return Event::next;
- } catch (NotFoundException) {
- Log::debug("Embed doesn't have a representation for the link id={$link->getId()}. Must have been stored before the plugin was enabled.");
- return Event::next;
- }
- $attributes = $embed->getImageHTMLAttributes();
- $res[] = Formatting::twigRenderFile(
- 'embed/embedView.html.twig',
- ['embed' => $embed, 'attributes' => $attributes, 'link' => $link],
- );
- return Event::stop;
- }
- /**
- * Save embedding information for an Attachment, if applicable.
- *
- * @throws DuplicateFoundException
- */
- public function onNewLinkFromNote(Link $link, Note $note): bool
- {
- // Only handle text mime
- $mimetype = $link->getMimetype();
- if (\is_null($mimetype) || !(Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml'))) {
- return Event::next;
- }
- // Ignore if already handled
- $attachment_embed = DB::find('attachment_embed', ['link_id' => $link->getId()]);
- if (!\is_null($attachment_embed)) {
- return Event::next;
- }
- // If an attachment already exist, do not create an Embed for it. Some other plugin must have done things
- $attachment_to_link = DB::find('attachment_to_link', ['link_id' => $link->getId()]);
- if (!\is_null($attachment_to_link)) {
- $attachment_id = $attachment_to_link->getAttachmentId();
- try {
- $attachment = DB::findOneBy('attachment', ['id' => $attachment_id]);
- $attachment->livesIncrementAndGet();
- return Event::next;
- } catch (DuplicateFoundException|NotFoundException $e) {
- Log::error($e);
- }
- }
- // Create an Embed representation for this URL
- $embed_data = $this->getEmbedLibMetadata($link->getUrl());
- $embed_data['link_id'] = $link->getId();
- $img_data = $this->downloadThumbnail($embed_data['thumbnail_url']);
- switch ($img_data) {
- case null: // URL isn't usable
- $embed_data['thumbnail_url'] = null;
- // no break
- case false: // Thumbnail isn't acceptable
- DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
- Event::handle('AttachmentStoreNew', [&$attachment]);
- break;
- default: // String is valid image data
- $temp_file = new TemporaryFile();
- $temp_file->write($img_data);
- try {
- $attachment = GSFile::storeFileAsAttachment($temp_file);
- $embed_data['attachment_id'] = $attachment->getId();
- } catch (ClientException) {
- DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()]));
- Event::handle('AttachmentStoreNew', [&$attachment]);
- }
- }
- $embed_data['attachment_id'] = $attachment->getId();
- DB::persist(Entity\AttachmentEmbed::create($embed_data));
- DB::flush();
- return Event::stop;
- }
- /**
- * @return bool true if allowed by the lists, false otherwise
- */
- private function allowedLink(string $url): bool
- {
- $passed_whitelist = !$this->check_whitelist;
- $passed_blacklist = !$this->check_blacklist;
- if ($this->check_whitelist) {
- $passed_whitelist = false; // don't trust be default
- $host = parse_url($url, \PHP_URL_HOST);
- foreach ($this->domain_whitelist as $regex => $provider) {
- if (preg_match("/{$regex}/", $host)) {
- $passed_whitelist = true; // we trust this source
- }
- }
- }
- if ($this->check_blacklist) {
- // assume it passed by default
- $host = parse_url($url, \PHP_URL_HOST);
- foreach ($this->domain_blacklist as $regex => $provider) {
- if (preg_match("/{$regex}/", $host)) {
- $passed_blacklist = false; // we blocked this source
- }
- }
- }
- return $passed_whitelist && $passed_blacklist;
- }
- /**
- * Perform an oEmbed or OpenGraph lookup for the given $url.
- *
- * Some known hosts are whitelisted with API endpoints where we
- * know they exist but autodiscovery data isn't available.
- *
- * Throws exceptions on failure.
- */
- private function getEmbedLibMetadata(string $url): array
- {
- Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
- $embed = new LibEmbed();
- $info = $embed->get($url);
- $metadata['title'] = $info->title;
- $metadata['description'] = $info->description;
- $metadata['author_name'] = $info->authorName;
- $root_url = parse_url($url);
- $root_url = "{$root_url['scheme']}://{$root_url['host']}";
- $metadata['author_url'] = $info->authorUrl ? (string) $info->authorUrl : $root_url;
- $metadata['provider_name'] = $info->providerName;
- $metadata['provider_url'] = (string) $info->providerUrl ?? $metadata['author_name'];
- if (!\is_null($info->image)) {
- $thumbnail_url = (string) $info->image;
- } else {
- $thumbnail_url = (string) $info->favicon;
- }
- // Check thumbnail URL validity
- $metadata['thumbnail_url'] = $thumbnail_url;
- return self::normalizeEmbedLibMetadata($metadata);
- }
- /**
- * Normalize fetched info.
- */
- private static function normalizeEmbedLibMetadata(array $metadata): array
- {
- if (isset($metadata['thumbnail_url'])) {
- // sometimes sites serve the path, not the full URL, for images
- // let's "be liberal in what you accept from others"!
- // add protocol and host if the thumbnail_url starts with /
- if ($metadata['thumbnail_url'][0] == '/') {
- $metadata['thumbnail_url'] = "{$metadata['provider_url']}{$metadata['thumbnail_url']}";
- }
- // Some wordpress opengraph implementations sometimes return a white blank image
- // no need for us to save that!
- if ($metadata['thumbnail_url'] == 'https://s0.wp.com/i/blank.jpg') {
- $metadata['thumbnail_url'] = null;
- }
- }
- return $metadata;
- }
- /**
- * Private helper that:
- * - checks if given URL is valid and is in fact an image (basic test), returns null if not;
- * - checks if respects file quota and whitelist/blacklist, returns false if not;
- * - downloads the thumbnail, returns a string if successful.
- *
- * @param string $url URL to the remote thumbnail
- */
- private function downloadThumbnail(string $url): bool|string|null
- {
- // Is this a valid URL?
- if (!Common::isValidHttpUrl($url)) {
- Log::debug("Invalid URL ({$url}) in Embed->downloadThumbnail.");
- return null;
- }
- // Is this URL trusted?
- if (!$this->allowedLink($url)) {
- Log::info("Blocked URL ({$url}) in Embed->downloadThumbnail.");
- return false;
- }
- // Validate if the URL really does point to a remote image
- $head = HTTPClient::head($url);
- try {
- $headers = $head->getHeaders();
- } catch (ClientExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) {
- Log::debug('Embed->downloadThumbnail@HTTPHead->getHeaders: ' . $e->getMessage());
- return null;
- }
- $headers = array_change_key_case($headers, \CASE_LOWER);
- if (empty($headers['content-type']) || GSFile::mimetypeMajor($headers['content-type'][0]) !== 'image') {
- Log::debug("URL ({$url}) doesn't point to an image (content-type: " . (!empty($headers['content-type'][0]) ? $headers['content-type'][0] : 'not available') . ') in Embed->downloadThumbnail.');
- return null;
- }
- // Does it respect the file quota?
- $file_size = $headers['content-length'][0] ?? null;
- $max_size = Common::config('attachments', 'file_quota');
- if (\is_null($file_size) || $file_size > $max_size) {
- Log::debug("Went to download remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted in Embed->downloadThumbnail.");
- return false;
- }
- // Download and return the file
- Log::debug("Downloading remote thumbnail from URL: {$url} in Embed->downloadThumbnail.");
- return HTTPClient::get($url)->getContent();
- }
- public function onAttachmentGetBestTitle(Attachment $attachment, Note $note, ?string &$title)
- {
- try {
- $embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
- $title = $embed->getTitle();
- return Event::stop;
- } catch (NotFoundException) {
- }
- return Event::next;
- }
- /**
- * Event raised when GNU social polls the plugin for information about it.
- * Adds this plugin's version information to $versions array
- *
- * @param array $versions inherited from parent
- *
- * @throws ServerException
- *
- * @return bool true hook value
- */
- public function onPluginVersion(array &$versions): bool
- {
- $versions[] = [
- 'name' => 'Embed',
- 'version' => $this->version(),
- 'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro',
- 'homepage' => GNUSOCIAL_PROJECT_URL,
- 'description', // TRANS: Plugin description. => _m('Plugin for using and representing oEmbed, OpenGraph and other data.'),
- ];
- return Event::next;
- }
- }
|