tags.pl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. use strict;
  2. use v5.10;
  3. =encoding utf8
  4. =head1 NAME
  5. tags - an Oddmuse module that implements tagging of pages and
  6. searching for tagged pages
  7. =head1 SYNOPSIS
  8. This module recognises the pattern C<[[tag:foo]]> on a page and will
  9. render this as a link to all pages tagged foo, as well as a link to
  10. the RSS feed for all pages tagged foo.
  11. Alternatively, the pattern C<[[tag:foo|bar]]> is also recognized. The
  12. only difference is that this will look like a link to bar instead of
  13. foo.
  14. When searching for a term of the form C<tag:foo> the term "foo" be
  15. searched in a separate tag index, making it much faster.
  16. You can also negate this particular form by using C<-tag:foo>.
  17. These searches will also work for Journal Pages, Recent Changes, and
  18. RSS feed.
  19. =head1 INSTALLATION
  20. Installing a module is easy: Create a modules subdirectory in your
  21. data directory, and put the Perl file in there. It will be loaded
  22. automatically.
  23. =cut
  24. AddModuleDescription('tags.pl', 'Tagging Extension');
  25. =head1 CONFIGURATION
  26. =head2 $TagUrl and $TagFeed
  27. These variable will be used to link the tags. By default, they will
  28. point at the wiki itself, using C<$ScriptName>. They use C<%s> as a
  29. placeholder for the tag.
  30. Example:
  31. $TagUrl = 'http://technorati.com/tag/%s';
  32. $TagFeed = 'http://feeds.technorati.com/tag/%s';
  33. By default, these two will point to the list of recent changes,
  34. filtered by the appropriate tag, formatted as HTML or RSS
  35. respectively.
  36. =head2 $TagFeedIcon
  37. This variable should point to an RSS icon. You can get one from
  38. L<http://www.feedicons.com/>, for example.
  39. Example:
  40. $TagFeedIcon = 'http://www.example.org/pics/rss.png';
  41. =cut
  42. our ($q, $Now, %Action, %Page, $FreeLinkPattern, @MyInitVariables, @MyRules, @MyAdminCode, $DataDir, $ScriptName);
  43. our ($TagUrl, $TagFeed, $TagFeedIcon, $TagFile);
  44. push(@MyInitVariables, \&TagsInit);
  45. sub TagsInit {
  46. $TagUrl = ScriptUrl('action=rc;rcfilteronly=tag:%s') unless $TagUrl;
  47. $TagFeed = ScriptUrl('action=rss;rcfilteronly=tag:%s') unless $TagFeed;
  48. $TagFile = "$DataDir/tag.db";
  49. }
  50. sub TagsGetLink {
  51. my ($url, $id) = @_;
  52. $id = UrlEncode($id);
  53. $url =~ s/\%s/$id/g or $url .= $id;
  54. return $url;
  55. }
  56. sub TagReadHash {
  57. require Storable;
  58. return %{ Storable::retrieve(encode_utf8($TagFile)) } if IsFile($TagFile);
  59. }
  60. # returns undef if encountering an error
  61. sub TagWriteHash {
  62. my $h = shift;
  63. require Storable;
  64. return Storable::store($h, encode_utf8($TagFile));
  65. }
  66. push(@MyRules, \&TagsRule);
  67. sub TagsRule {
  68. if (m/\G(\[\[tag:$FreeLinkPattern\]\])/cg
  69. or m/\G(\[\[tag:$FreeLinkPattern\|([^]|]+)\]\])/cg) {
  70. # [[tag:Free Link]], [[tag:Free Link|alt text]]
  71. my ($tag, $text) = ($2, $3);
  72. my $html = $q->a({-href=>TagsGetLink($TagUrl, $tag),
  73. -class=>'outside tag',
  74. -title=>T('Tag'),
  75. -rel=>'tag'
  76. }, $text || $tag);
  77. if ($TagFeedIcon) {
  78. $html .= ' ' . $q->a({-href=>TagsGetLink($TagFeed, $tag),
  79. -class=>'feed tag',
  80. -title=>T('Feed for this tag'),
  81. -rel=>'feed'
  82. }, $q->img({-src=>$TagFeedIcon,
  83. -alt=>T('RSS')}));
  84. }
  85. return $html;
  86. }
  87. return;
  88. }
  89. =pod
  90. When saving, a tags db is written to disk. If it doesn't exist, it
  91. will be regenerated.
  92. =cut
  93. *OldTagSave = \&Save;
  94. *Save = \&NewTagSave;
  95. sub NewTagSave { # called within a lock!
  96. OldTagSave(@_);
  97. my $id = shift;
  98. # Within a tag, space is replaced by _ as in foo_bar.
  99. my %tag = map { lc(FreeToNormal($_)) => 1 }
  100. ($Page{text} =~ m/\[\[tag:$FreeLinkPattern\]\]/g,
  101. $Page{text} =~ m/\[\[tag:$FreeLinkPattern\|([^]|]+)\]\]/g);
  102. # open the DB file
  103. my %h = TagReadHash();
  104. # For each tag we list the files tagged. Add the current file for
  105. # all those tags where it is missing.
  106. foreach my $tag (keys %tag) {
  107. my %file = map {$_=>1} @{$h{$tag}};
  108. if (not $file{$id}) {
  109. $file{$id} = 1;
  110. $h{$tag} = [keys %file];
  111. }
  112. }
  113. # For each file in our hash, we have a reverse lookup of all the
  114. # tags used. This allows us to delete the references that no longer
  115. # show up without looping through them all. The files are indexed
  116. # with a starting underscore because this is an illegal tag name.
  117. foreach my $tag (@{$h{"_$id"}}) {
  118. # If the tag we're looking at is no longer listed, we have work to
  119. # do.
  120. if (!$tag{$tag}) {
  121. my %file = map {$_=>1} @{$h{$tag}};
  122. delete $file{$id};
  123. if (%file) {
  124. $h{$tag} = [keys %file];
  125. } else {
  126. delete $h{$tag};
  127. }
  128. }
  129. }
  130. # Store the new reverse lookup of all the tags used on the current
  131. # page. If no more tags appear on this page, delete the entry.
  132. if (%tag) {
  133. $h{"_$id"} = [keys %tag];
  134. } else {
  135. delete $h{"_$id"};
  136. }
  137. TagWriteHash(\%h);
  138. }
  139. =pod
  140. When a page expires, the relevant pages and references have to be
  141. removed from the tags db.
  142. =cut
  143. *OldTagDeletePage = \&DeletePage;
  144. *DeletePage = \&NewTagDeletePage;
  145. sub NewTagDeletePage { # called within a lock!
  146. my $id = shift;
  147. # open the DB file
  148. my %h = TagReadHash();
  149. # For each file in our hash, we have a reverse lookup of all the
  150. # tags used. This allows us to delete the references that no longer
  151. # show up without looping through them all.
  152. foreach my $tag (@{$h{"_$id"}}) {
  153. my %file = map {$_=>1} @{$h{$tag}};
  154. delete $file{$id};
  155. if (%file) {
  156. $h{$tag} = [keys %file];
  157. } else {
  158. delete $h{$tag};
  159. }
  160. }
  161. # Delete reverse lookup entry.
  162. delete $h{"_$id"};
  163. TagWriteHash(\%h);
  164. # Return any error codes?
  165. return OldTagDeletePage($id, @_);
  166. }
  167. =pod
  168. When searching, the tags db is read and used. This works by scanning
  169. the search string for tag:foo and -tag:bar elements, searching for
  170. those, and then calling the grep filter code with the new list of
  171. pages and a new search term without the tag terms.
  172. =cut
  173. sub TagFind {
  174. my @tags = @_;
  175. # open the DB file
  176. my %h = TagReadHash();
  177. my %page;
  178. foreach my $tag (@tags) {
  179. foreach my $id (@{$h{lc($tag)}}) {
  180. $page{$id} = 1;
  181. }
  182. }
  183. my @result = sort keys %page;
  184. return @result;
  185. }
  186. *OldTagFiltered = \&Filtered;
  187. *Filtered = \&NewTagFiltered;
  188. sub NewTagFiltered { # called within a lock!
  189. my ($string, @pages) = @_;
  190. my %page = map { $_ => 1 } @pages;
  191. # looking at all the "tag:SOME TERMS" and and tag:TERM
  192. my @tagterms = map { FreeToNormal($_) } grep(/^-?tag:/, shift =~ /\"([^\"]+)\"|(\S+)/g);
  193. my @positives = map {substr($_, 4)} grep(/^tag:/, @tagterms);
  194. my @negatives = map {substr($_, 5)} grep(/^-tag:/, @tagterms);
  195. if (@positives) {
  196. my %found;
  197. foreach my $id (TagFind(@positives)) {
  198. $found{$id} = 1 if $page{$id};
  199. }
  200. %page = %found;
  201. }
  202. # remove the negatives
  203. foreach my $id (TagFind(@negatives)) {
  204. delete $page{$id};
  205. }
  206. # filter out the tags from the search string
  207. $string = join(' ', grep(!/^-?tag:/, $string =~ /\"([^\"]+)\"|(\S+)/g));
  208. # run the old code for any remaining search terms
  209. return OldTagFiltered($string, sort keys %page);
  210. }
  211. =pod
  212. There remains a problem: The real search code will still be in
  213. operation, and terms of the form -tag:foo will never match. That's why
  214. the code that does the ordinary search has to be changed as well.
  215. We're need to remove all tag terms (again) in order to not confuse it.
  216. =cut
  217. *OldTagSearchString = \&SearchString;
  218. *SearchString = \&NewTagSearchString;
  219. sub NewTagSearchString {
  220. # filter out the negative tags from the search string
  221. my $string = join(' ', map { NormalToFree($_) }
  222. grep(!/^-tag:/, shift =~ /\"([^\"]+)\"|(\S+)/g));
  223. return 1 unless $string;
  224. return OldTagSearchString($string, @_);
  225. }
  226. =pod
  227. We also want to provide a visual feedback of tag importance using a
  228. "tag cloud" -- larger font size means that a tag has been used more
  229. often.
  230. =cut
  231. $Action{tagcloud} = \&TagCloud;
  232. sub TagCloud {
  233. print GetHeader('', T('Tag Cloud'), ''),
  234. $q->start_div({-class=>'content cloud'}) . '<p>';
  235. # open the DB file
  236. my %h = TagReadHash();
  237. my $max = 0;
  238. my $min = 0;
  239. my %count = ();
  240. foreach my $tag (grep !/^_/, keys %h) {
  241. $count{$tag} = @{$h{$tag}};
  242. $max = $count{$tag} if $count{$tag} > $max;
  243. $min = $count{$tag} if not $min or $count{$tag} < $min;
  244. }
  245. foreach my $tag (sort keys %count) {
  246. my $n = $count{$tag};
  247. print $q->a({-href => "$ScriptName?search=tag:" . UrlEncode($tag),
  248. -title => $n,
  249. -style => 'font-size: '
  250. . int(80+120*($max == $min ? 1 : ($n-$min)/($max-$min)))
  251. . '%;',
  252. }, NormalToFree($tag)), ' ... ';
  253. }
  254. print '</p></div>';
  255. PrintFooter();
  256. }
  257. =pod
  258. Finally, we need to provide the means to reindex the entire site. The
  259. Reindex Action will do this. This should only be necessary when you
  260. install the module, and when you suspect that the tag.db is out of
  261. sync such as after a restoration from backup.
  262. Example:
  263. http://example.org/cgi-bin/wiki?action=reindex
  264. =cut
  265. $Action{reindex} = \&DoTagsReindex;
  266. sub DoTagsReindex {
  267. if (not UserIsAdmin()
  268. and IsFile($TagFile)
  269. and $Now - Modified($TagFile) < 0.5) {
  270. ReportError(T('Rebuilding index not done.'), '403 FORBIDDEN',
  271. 0, T('(Rebuilding the index can only be done once every 12 hours.)'));
  272. }
  273. # Request the main lock, because we want to prevent anybody from
  274. # saving while we are reindexing.
  275. RequestLockOrError();
  276. print GetHttpHeader('text/plain');
  277. # open the DB file
  278. require Storable;
  279. my %h = ();
  280. foreach my $id (AllPagesList()) {
  281. print "$id\n";
  282. OpenPage($id);
  283. my %tag = map { lc(FreeToNormal($_)) => 1 }
  284. ($Page{text} =~ m/\[\[tag:$FreeLinkPattern\]\]/g,
  285. $Page{text} =~ m/\[\[tag:$FreeLinkPattern\|([^]|]+)\]\]/g);
  286. next unless %tag;
  287. # For each tag we list the files tagged. Add the current file for
  288. # all tags.
  289. foreach my $tag (keys %tag) {
  290. push(@{$h{$tag}}, $id);
  291. }
  292. # Store the reverse lookup of all the tags used on the current
  293. # page.
  294. $h{"_$id"} = [keys %tag];
  295. }
  296. if (TagWriteHash(\%h)) {
  297. print "Saved tag file.\n";
  298. } else {
  299. print "Error saving tag file.\n";
  300. }
  301. ReleaseLock();
  302. }
  303. =pod
  304. If you want to debug the data structure, use the Tag List Action. All
  305. keys starting with an underscore are pagenames, the others are tags.
  306. Example:
  307. http://example.org/cgi-bin/wiki?action=taglist
  308. =cut
  309. $Action{taglist} = \&TagList;
  310. sub TagList {
  311. print GetHttpHeader('text/plain');
  312. # open the DB file
  313. my %h = TagReadHash();
  314. foreach my $id (sort keys %h) {
  315. print "$id: " . join(', ', @{$h{$id}}) . "\n";
  316. }
  317. TagWriteHash(\%h);
  318. }
  319. =pod
  320. Both these actions are of course available from the Administration
  321. menu.
  322. =cut
  323. push(@MyAdminCode, \&TagsMenu);
  324. sub TagsMenu {
  325. my ($id, $menuref, $restref) = @_;
  326. push(@$menuref,
  327. ScriptLink('action=reindex', T('Rebuild tag index'), 'reindex')
  328. . ', ' . ScriptLink('action=taglist', T('list tags'), 'taglist')
  329. . ', ' . ScriptLink('action=tagcloud', T('tag cloud'), 'tagcloud'));
  330. }
  331. =head1 COPYRIGHT AND LICENSE
  332. Copyright (C) 2005–2015 Alex Schroeder <alex@gnu.org>
  333. This program is free software; you can redistribute it and/or modify
  334. it under the terms of the GNU General Public License as published by
  335. the Free Software Foundation; either version 3 of the License, or (at
  336. your option) any later version.
  337. This program is distributed in the hope that it will be useful, but
  338. WITHOUT ANY WARRANTY; without even the implied warranty of
  339. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  340. General Public License for more details.
  341. You should have received a copy of the GNU General Public License
  342. along with this program. If not, see <http://www.gnu.org/licenses/>.
  343. =cut