123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495 |
- <?php
- // {{{ License
- // This file is part of GNU social - https://www.gnu.org/software/social
- //
- // GNU social is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Affero General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // GNU social is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Affero General Public License for more details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
- // }}}
- /**
- * OEmbed and OpenGraph implementation for GNU social
- *
- * @package GNUsocial
- *
- * @author Mikael Nordfeldth
- * @author Stephen Paul Weber
- * @author hannes
- * @author Mikael Nordfeldth
- * @author Miguel Dantas
- * @author Diogo Peralta Cordeiro <mail@diogo.site>
- * @authir Hugo Sales <hugo@hsal.es>
- *
- * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
- * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
- */
- namespace Plugin\Embed;
- use App\Core\Cache;
- use App\Core\DB\DB;
- use App\Core\Event;
- use App\Core\GSFile;
- use App\Core\HTTPClient;
- use App\Core\Log;
- use App\Core\Modules\Plugin;
- use App\Core\Router\RouteLoader;
- use App\Core\Router\Router;
- use App\Entity\Attachment;
- use App\Entity\AttachmentThumbnail;
- use App\Util\Common;
- use App\Util\Exception\DuplicateFoundException;
- use App\Util\Exception\NotFoundException;
- use App\Util\Formatting;
- use App\Util\TemporaryFile;
- use Embed\Embed as LibEmbed;
- use Exception;
- use Symfony\Component\HttpFoundation\Request;
- /**
- * Base class for the Embed plugin that does most of the heavy lifting to get
- * and display representations for remote content.
- *
- * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
- * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
- */
- class Embed extends Plugin
- {
- /**
- * Settings which can be set in social.local.yaml
- * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
- */
- public $domain_allowlist = [
- // hostname => service provider
- '.*' => '', // Default to allowing any host
- ];
- /**
- * This code executes when GNU social creates the page routing, and we hook
- * on this event to add our action handler for Embed.
- *
- * @param $m URLMapper the router that was initialized.
- *
- * @return bool true if successful, the exception object if it isn't.
- * @throws Exception
- *
- */
- public function onAddRoute(RouteLoader $m): bool
- {
- $m->connect('oembed', 'main/oembed', Controller\Embed::class);
- $m->connect('embed', 'main/embed', Controller\Embed::class);
- return Event::next;
- }
- /**
- * Insert oembed and opengraph tags in all HTML head elements
- */
- public function onShowHeadElements(Request $request, array &$result)
- {
- $matches = [];
- preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
- switch ($matches[1]) {
- case 'attachment':
- $url = "{$matches[1]}/{$matches[2]}";
- break;
- }
- if (isset($url)) {
- foreach (['xml', 'json'] as $format) {
- $result[] = [
- 'link' => [
- 'rel' => 'alternate',
- 'type' => "application/{$format}+oembed",
- 'href' => Router::url('embed', ['format' => $format, 'url' => $url]),
- 'title' => 'oEmbed',
- ],];
- }
- }
- return Event::next;
- }
- /**
- * Save embedding information for an Attachment, if applicable.
- *
- * Normally this event is called through File::saveNew()
- *
- * @param Attachment $attachment The newly inserted Attachment object.
- *
- * @return bool success
- */
- public function onAttachmentStoreNew(Attachment $attachment): bool
- {
- try {
- DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
- } catch (NotFoundException) {
- if ($attachment->hasRemoteUrl() && $attachment->hasMimetype()) {
- $mimetype = $attachment->getMimetype();
- if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) {
- try {
- $embed_data = $this->getEmbed($attachment->getRemoteUrl(), $attachment);
- $embed_data['attachment_id'] = $attachment->getId();
- DB::persist(Entity\AttachmentEmbed::create($embed_data));
- DB::flush();
- } catch (Exception $e) {
- Log::warning($e);
- }
- }
- }
- } catch (DuplicateFoundException) {
- Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}");
- }
- return Event::next;
- }
- /**
- * Replace enclosure representation of an attachment with the data from embed
- */
- public function onAttachmentFileInfo(int $attachment_id, ?array &$enclosure)
- {
- try {
- $embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment_id]);
- } catch (NotFoundException) {
- return Event::next;
- }
- // We know about this attachment, so we 'own' it, but know
- // that it doesn't have an image
- if (!$embed->isImage()) {
- $enclosure = null;
- return Event::stop;
- }
- $enclosure = [
- 'filepath' => $embed->getFilepath(),
- 'mimetype' => $embed->getMimetype(),
- 'title' => $embed->getTitle(),
- 'width' => $embed->getWidth(),
- 'height' => $embed->getHeight(),
- 'url' => $embed->getMediaUrl(),
- ];
- return Event::stop;
- }
- /**
- * Show this attachment enhanced with the corresponing Embed data, if available
- */
- public function onShowAttachment(Attachment $attachment, array &$res)
- {
- try {
- $embed = Cache::get('attachment-embed-' . $attachment->getId(),
- fn() => DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]));
- } catch (DuplicateFoundException $e) {
- Log::waring($e);
- return Event::next;
- } catch (NotFoundException) {
- return Event::next;
- }
- if (is_null($embed) && empty($embed->getAuthorName()) && empty($embed->getProvider())) {
- Log::debug('Embed doesn\'t have a representation for the attachment #' . $attachment->getId());
- return Event::next;
- }
- $attributes = $embed->getImageHTMLAttributes(['class' => 'u-photo embed']);
- $res[] = Formatting::twigRenderString(<<<END
- <article class="h-entry embed">
- <header>
- {% if attributes != false %}
- <img class="u-photo embed" width="{{attributes['width']}}" height="{{attributes['height']}}" src="{{attributes['src']}}" />
- {% endif %}
- <h5 class="p-name embed">
- <a class="u-url" href="{{attachment.getRemoteUrl()}}">{{embed.getTitle() | escape}}</a>
- </h5>
- <div class="p-author embed">
- {% if embed.getAuthorName() is not null %}
- <div class="fn vcard author">
- {% if embed.getAuthorUrl() is null %}
- <p>{{embed.getAuthorName()}}</p>
- {% else %}
- <a href="{{embed.getAuthorUrl()}}" class="url">{{embed.getAuthorName()}}</a>
- {% endif %}
- </div>
- {% endif %}
- {% if embed.getProvider() is not null %}
- <div class="fn vcard">
- {% if embed.getProviderUrl() is null %}
- <p>{{embed.getProvider()}}</p>
- {% else %}
- <a href="{{embed.getProviderUrl()}}" class="url">{{embed.getProvider()}}</a>
- {% endif %}
- </div>
- {% endif %}
- </div>
- </header>
- <div class="p-summary embed">
- {{ embed.getHtml() | escape }}
- </div>
- </article>
- END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachment]);
- return Event::stop;
- }
- /**
- * @return bool false on no check made, provider name on success
- * @return string|false on no check made, provider name on success
- *
- * @throws ServerException if check is made but fails
- *
- */
- protected function checkAllowlist(string $url): string|bool
- {
- if ($this->check_allowlist ?? false) {
- return false; // indicates "no check made"
- }
- $host = parse_url($url, PHP_URL_HOST);
- foreach ($this->domain_allowlist as $regex => $provider) {
- if (preg_match("/{$regex}/", $host)) {
- return $provider; // we trust this source, return provider name
- }
- }
- throw new ServerException(_m('Domain not in remote thumbnail source allowlist: {host}', ['host' => $host]));
- }
- /**
- * Check the file size of a remote file using a HEAD request and checking
- * the content-length variable returned. This isn't 100% foolproof but is
- * reliable enough for our purposes.
- *
- * @param string $url
- * @param array|null $headers - if we already made a request
- *
- * @return int|null the file size if it succeeds, false otherwise.
- */
- private function getRemoteFileSize(string $url, ?array $headers = null): ?int
- {
- try {
- if ($headers === null) {
- if (!Common::isValidHttpUrl($url)) {
- Log::error('Invalid URL in Embed::getRemoteFileSize()');
- return false;
- }
- $head = HTTPClient::head($url);
- $headers = $head->getHeaders();
- $headers = array_change_key_case($headers, CASE_LOWER);
- }
- return $headers['content-length'][0] ?? false;
- } catch (Exception $e) {
- Loog::error($e);
- return false;
- }
- }
- /**
- * A private helper function that uses a HEAD request to check the mime type
- * of a remote URL to see it it's an image.
- *
- * @param mixed $url
- * @param null|mixed $headers
- *
- * @return bool true if the remote URL is an image, or false otherwise.
- */
- private function isRemoteImage(string $url, ?array $headers = null): bool
- {
- try {
- if ($headers === null) {
- if (!Common::isValidHttpUrl($url)) {
- Log::error('Invalid URL in Embed::getRemoteFileSize()');
- return false;
- }
- $head = HTTPClient::head($url);
- $headers = $head->getHeaders();
- $headers = array_change_key_case($headers, CASE_LOWER);
- }
- return !empty($headers['content-type']) && GSFile::mimetypeMajor($headers['content-type'][0]) === 'image';
- } catch (Exception $e) {
- Log::error($e);
- return false;
- }
- }
- /**
- * Validate that $imgData is a valid image, place it in it's folder and resize
- *
- * @param $imgData - The image data to validate
- * @param null|array $headers - The headers possible previous request to $url
- */
- protected function validateAndWriteImage($imgData, ?array $headers = null): array
- {
- $file = new TemporaryFile();
- $file->write($imgData);
- Event::handle('HashFile', [$file->getRealPath(), &$hash]);
- $filepath = Common::config('storage', 'dir') . "embed/{$hash}" . Common::config('thumbnail', 'extension');
- $width = Common::config('plugin_embed', 'width');
- $height = Common::config('plugin_embed', 'height');
- $smart_crop = Common::config('plugin_embed', 'smart_crop');
- Event::handle('ResizeImagePath', [$file->getRealPath(), $filepath, &$width, &$height, $smart_crop, &$mimetype]);
- unset($file);
- if (!is_null($headers) && array_key_exists('content-disposition', $headers) && preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'][0], $matches) === 1) {
- $original_name = $matches[1];
- }
- return [$filepath, $width, $height, $original_name ?? null, $mimetype];
- }
- /**
- * Fetch, Validate and Write a remote image from url to temporary file
- *
- * @param Attachment $attachment
- * @param string $media_url URL for the actual media representation
- * @return array|bool
- * @throws Exception
- */
- protected function fetchValidateWriteRemoteImage(Attachment $attachment, string $media_url): array|bool
- {
- if ($attachment->hasFilename() && file_exists($attachment->getPath())) {
- throw new AlreadyFulfilledException(_m('A thumbnail seems to already exist for remote file with id=={id}', ['id' => $attachment->getId()]));
- }
- if (Formatting::startsWith($media_url, 'file://')) {
- $filename = Formatting::removePrefix($media_url, 'file://');
- $info = getimagesize($filename);
- $filename = basename($filename);
- $width = $info[0];
- $height = $info[1];
- } else {
- $this->checkAllowlist($media_url);
- $head = HTTPClient::head($media_url);
- $headers = $head->getHeaders();
- $headers = array_change_key_case($headers, CASE_LOWER);
- try {
- $is_image = $this->isRemoteImage($media_url, $headers);
- if ($is_image == true) {
- $file_size = $this->getRemoteFileSize($media_url, $headers);
- $max_size = Common::config('attachments', 'file_quota');
- if (($file_size != false) && ($file_size > $max_size)) {
- throw new \Exception("Wanted to store remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted.");
- }
- } else {
- return false;
- }
- } catch (Exception $err) {
- Log::debug('Could not determine size of remote image, aborted local storage.');
- throw $err;
- }
- // First we download the file to memory and test whether it's actually an image file
- Log::debug('Downloading remote thumbnail for file id==' . $attachment->getId() . " with thumbnail URL: {$media_url}");
- try {
- $imgData = HTTPClient::get($media_url)->getContent();
- if (isset($imgData)) {
- [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData, $headers);
- } else {
- throw new UnsupportedMediaException(_m('HTTPClient returned an empty result'));
- }
- } catch (UnsupportedMediaException $e) {
- // Couldn't find anything that looks like an image, nothing to do
- Log::debug($e);
- return false;
- }
- }
- return [$filepath, $width, $height, $original_name, $mimetype];
- }
- /**
- * Perform an oEmbed or OpenGraph lookup for the given $url.
- *
- * Some known hosts are allowlisted with API endpoints where we
- * know they exist but autodiscovery data isn't available.
- *
- * Throws exceptions on failure.
- *
- * @param string $url
- * @param Attachment $attachment
- * @return array
- */
- public function getEmbed(string $url, Attachment $attachment): array
- {
- Log::info('Checking for remote URL metadata for ' . $url);
- try {
- Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
- $embed = new LibEmbed();
- $info = $embed->get($url);
- $metadata['title'] = $info->title;
- $metadata['html'] = $info->description;
- $metadata['author_name'] = $info->authorName;
- $metadata['author_url'] = $info->authorUrl;
- $metadata['provider_name'] = $info->providerName;
- $metadata['provider_url'] = $info->providerUrl;
- if (!is_null($info->image)) {
- $image_url = (string)$info->image;
- if (Formatting::startsWith($image_url, 'data')) {
- // Inline image
- $imgData = base64_decode(substr($info->image, stripos($info->image, 'base64,') + 7));
- [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData);
- } else {
- [$filepath, $width, $height, $original_name, $mimetype] = $this->fetchValidateWriteRemoteImage($attachment, $image_url);
- }
- $metadata['width'] = $width;
- $metadata['height'] = $height;
- $metadata['mimetype'] = $mimetype;
- $metadata['media_url'] = $image_url;
- $metadata['filename'] = Formatting::removePrefix($filepath, Common::config('storage', 'dir'));
- }
- } catch (Exception $e) {
- Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . $e->getMessage());
- }
- $metadata = self::normalize($metadata);
- $attachment->setTitle($metadata['title']);
- return $metadata;
- }
- /**
- * Normalize fetched info.
- */
- public static function normalize(array $data): array
- {
- if (isset($metadata['url'])) {
- // sometimes sites serve the path, not the full URL, for images
- // let's "be liberal in what you accept from others"!
- // add protocol and host if the thumbnail_url starts with /
- if ($metadata['url'][0] == '/') {
- $thumbnail_url_parsed = parse_url($metadata['url']);
- $metadata['url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}";
- }
- // Some wordpress opengraph implementations sometimes return a white blank image
- // no need for us to save that!
- if ($metadata['url'] == 'https://s0.wp.com/i/blank.jpg') {
- $metadata['url'] = null;
- }
- if (!isset($data['width'])) {
- $data['width'] = Common::config('plugin_embed', 'width');
- $data['height'] = Common::config('plugin_embed', 'height');
- }
- }
- return $data;
- }
- }
|