Tag.php 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. <?php
  2. declare(strict_types = 1);
  3. // {{{ License
  4. // This file is part of GNU social - https://www.gnu.org/software/social
  5. //
  6. // GNU social is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU Affero General Public License as published by
  8. // the Free Software Foundation, either version 3 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // GNU social is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU Affero General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Affero General Public License
  17. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  18. // }}}
  19. namespace Component\Tag;
  20. use App\Core\Cache;
  21. use App\Core\DB\DB;
  22. use App\Core\Event;
  23. use function App\Core\I18n\_m;
  24. use App\Core\Modules\Component;
  25. use App\Core\Router\Router;
  26. use App\Entity\Actor;
  27. use App\Entity\Note;
  28. use App\Util\Common;
  29. use App\Util\Exception\ClientException;
  30. use App\Util\Formatting;
  31. use App\Util\Functional as GSF;
  32. use App\Util\HTML;
  33. use Component\Circle\Entity\ActorTag;
  34. use Component\Language\Entity\Language;
  35. use Component\Tag\Entity\NoteTag;
  36. use Doctrine\Common\Collections\ExpressionBuilder;
  37. use Doctrine\ORM\Query\Expr;
  38. use Doctrine\ORM\QueryBuilder;
  39. use Functional as F;
  40. use Symfony\Component\Form\Extension\Core\Type\CheckboxType;
  41. use Symfony\Component\HttpFoundation\Request;
  42. /**
  43. * Component responsible for extracting tags from posted notes, as well as normalizing them
  44. *
  45. * @author Hugo Sales <hugo@hsal.es>
  46. * @author Diogo Peralta Cordeiro <@diogo.site>
  47. * @copyright 2021 Free Software Foundation, Inc http://www.fsf.org
  48. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  49. */
  50. class Tag extends Component
  51. {
  52. public const MAX_TAG_LENGTH = 64;
  53. public const TAG_REGEX = '/(^|\\s)(#[\\pL\\pN_\\-]{1,64})/u'; // Brion Vibber 2011-02-23 v2:classes/Notice.php:367 function saveTags
  54. public const TAG_SLUG_REGEX = '[A-Za-z0-9]{1,64}';
  55. public function onAddRoute($r): bool
  56. {
  57. $r->connect('single_note_tag', '/note-tag/{tag<' . self::TAG_SLUG_REGEX . '>}', [Controller\Tag::class, 'single_note_tag']);
  58. $r->connect('multi_note_tags', '/note-tags/{tags<(' . self::TAG_SLUG_REGEX . ',)+' . self::TAG_SLUG_REGEX . '>}', [Controller\Tag::class, 'multi_note_tags']);
  59. return Event::next;
  60. }
  61. /**
  62. * Process note by extracting any tags present
  63. */
  64. public function onProcessNoteContent(Note $note, string $content, string $content_type, array $extra_args): bool
  65. {
  66. if ($extra_args['TagProcessed'] ?? false) {
  67. return Event::next;
  68. }
  69. // XXX: We remove <span> because when content is in html the tag comes as #<span>hashtag</span>
  70. $content = str_replace('<span>', '', $content);
  71. $matched_tags = [];
  72. preg_match_all(self::TAG_REGEX, $content, $matched_tags, \PREG_SET_ORDER);
  73. $matched_tags = array_unique(F\map($matched_tags, fn ($m) => $m[2]));
  74. foreach ($matched_tags as $match) {
  75. $tag = self::extract($match);
  76. if (!self::validate($tag)) {
  77. continue; // Ignore invalid tag candidates
  78. }
  79. $canonical_tag = self::canonicalTag($tag, \is_null($lang_id = $note->getLanguageId()) ? null : Language::getById($lang_id)->getLocale());
  80. DB::persist(NoteTag::create([
  81. 'tag' => $tag,
  82. 'canonical' => $canonical_tag,
  83. 'note_id' => $note->getId(),
  84. 'use_canonical' => $extra_args['tag_use_canonical'] ?? false,
  85. 'language_id' => $lang_id,
  86. ]));
  87. Cache::pushList("tag-{$canonical_tag}", $note);
  88. foreach (self::cacheKeys($canonical_tag) as $key) {
  89. Cache::delete($key);
  90. }
  91. }
  92. return Event::next;
  93. }
  94. public function onRenderPlainTextNoteContent(string &$text, ?string $locale = null): bool
  95. {
  96. $text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . self::tagLink($m[2], $locale), $text);
  97. return Event::next;
  98. }
  99. public static function cacheKeys(string $tag_single_or_multi): array
  100. {
  101. return [
  102. 'note_single' => "note-tag-feed-{$tag_single_or_multi}",
  103. 'note_multi' => "note-tags-feed-{$tag_single_or_multi}",
  104. 'actor_single' => "actor-tag-feed-{$tag_single_or_multi}",
  105. 'actor_multi' => "actor-tags-feed-{$tag_single_or_multi}",
  106. ];
  107. }
  108. private static function tagLink(string $tag, ?string $locale): string
  109. {
  110. $tag = self::extract($tag);
  111. $url = Router::url('single_note_tag', !\is_null($locale) ? ['tag' => $tag, 'locale' => $locale] : ['tag' => $tag]);
  112. return HTML::html(['span' => ['attrs' => ['class' => 'tag'],
  113. '#' . HTML::html(['a' => [
  114. 'attrs' => [
  115. 'href' => $url,
  116. 'rel' => 'tag', // https://microformats.org/wiki/rel-tag
  117. ],
  118. $tag,
  119. ]], options: ['indent' => false]),
  120. ]], options: ['indent' => false, 'raw' => true]);
  121. }
  122. public static function extract(string $tag): string
  123. {
  124. return self::ensureLength(Formatting::removePrefix($tag, '#'));
  125. }
  126. public static function validate(string $tag): bool
  127. {
  128. return preg_match(self::TAG_REGEX, '#' . $tag) === 1;
  129. }
  130. public static function sanitize(string $tag): string
  131. {
  132. $tag = self::extract($tag);
  133. if (!self::validate($tag)) {
  134. throw new ClientException(_m('Invalid tag given: {tag}', ['{tag}' => $tag]));
  135. }
  136. return $tag;
  137. }
  138. public static function ensureLength(string $tag): string
  139. {
  140. return mb_substr($tag, 0, self::MAX_TAG_LENGTH);
  141. }
  142. /**
  143. * Convert a tag to its canonical representation, by splitting it
  144. * into words, stemming it in the given language (if enabled) and
  145. * sluggifying it (turning it into an ASCII representation)
  146. */
  147. public static function canonicalTag(string $tag, ?string $language = null): string
  148. {
  149. $result = '';
  150. foreach (Formatting::splitWords(str_replace('#', '', $tag)) as $word) {
  151. $temp_res = null;
  152. if (\is_null($language) || Event::handle('StemWord', [$language, $word, &$temp_res]) !== Event::stop) {
  153. $temp_res = $word;
  154. }
  155. $result .= Formatting::slugify($temp_res);
  156. }
  157. return self::ensureLength($result);
  158. }
  159. /**
  160. * Populate $note_expr with an expression to match a tag, if the term looks like a tag
  161. *
  162. * $term /^(note|tag|people|actor)/ means we want to match only either a note or an actor
  163. */
  164. public function onCollectionQueryCreateExpression(ExpressionBuilder $eb, string $term, ?string $locale, ?Actor $actor, &$note_expr, &$actor_expr): bool
  165. {
  166. if (!str_contains($term, ':')) {
  167. return Event::next;
  168. }
  169. if (\is_null($locale)) {
  170. $locale = Common::currentLanguage();
  171. }
  172. [$search_type, $search_term] = explode(':', $term);
  173. if (str_starts_with($search_term, '#')) {
  174. $search_term = self::sanitize($search_term);
  175. $canonical_search_term = self::canonicalTag($search_term, $locale);
  176. $temp_note_expr = $eb->eq('note_tag.canonical', $canonical_search_term);
  177. $temp_actor_expr = $eb->eq('actor_tag.canonical', $canonical_search_term);
  178. if (Formatting::startsWith($term, ['note:', 'tag:', 'people:'])) {
  179. $note_expr = $temp_note_expr;
  180. } elseif (Formatting::startsWith($term, ['people:', 'actor:'])) {
  181. $actor_expr = $temp_actor_expr;
  182. } elseif (Formatting::startsWith($term, GSF::cartesianProduct([['people', 'actor'], ['circle', 'list'], [':']], separator: ['-', '_']))) {
  183. $null_tagger_expr = $eb->isNull('actor_circle.tagger');
  184. $tagger_expr = \is_null($actor_expr) ? $null_tagger_expr : $eb->orX($null_tagger_expr, $eb->eq('actor_circle.tagger', $actor->getId()));
  185. $tags = array_unique([$search_term, $canonical_search_term]);
  186. $tag_expr = \count($tags) === 1 ? $eb->eq('actor_circle.tag', $tags[0]) : $eb->in('actor_circle.tag', $tags);
  187. $search_expr = $eb->andX(
  188. $tagger_expr,
  189. $tag_expr,
  190. );
  191. $note_expr = $search_expr;
  192. $actor_expr = $search_expr;
  193. } else {
  194. $note_expr = $temp_note_expr;
  195. $actor_expr = $temp_actor_expr;
  196. return Event::next;
  197. }
  198. return Event::stop;
  199. }
  200. return Event::next;
  201. }
  202. public function onCollectionQueryAddJoins(QueryBuilder &$note_qb, QueryBuilder &$actor_qb): bool
  203. {
  204. $note_qb->leftJoin(NoteTag::class, 'note_tag', Expr\Join::WITH, 'note_tag.note_id = note.id');
  205. $actor_qb->leftJoin(ActorTag::class, 'actor_tag', Expr\Join::WITH, 'actor_tag.tagger = actor.id');
  206. return Event::next;
  207. }
  208. public function onPostingAddFormEntries(Request $request, Actor $actor, array &$form_params): bool
  209. {
  210. $form_params[] = ['tag_use_canonical', CheckboxType::class, ['required' => false, 'data' => true, 'label' => _m('Make note tags canonical'), 'help' => _m('Canonical tags will be treated as a version of an existing tag with the same root/stem (e.g. \'#great_tag\' will be considered as a version of \'#great\', if it already exists)')]];
  211. return Event::next;
  212. }
  213. public function onAddExtraArgsToNoteContent(Request $request, Actor $actor, array $data, array &$extra_args): bool
  214. {
  215. if (!isset($data['tag_use_canonical'])) {
  216. throw new ClientException(_m('Missing Use Canonical preference for Tags.'));
  217. }
  218. $extra_args['tag_use_canonical'] = $data['tag_use_canonical'];
  219. return Event::next;
  220. }
  221. }