localnames.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. # Copyright (C) 2004, 2005, 2007 Alex Schroeder <alex@emacswiki.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. use strict;
  16. use v5.10;
  17. AddModuleDescription('localnames.pl', 'Local Names Extension');
  18. our ($q, $Now, %Page, %Action, $OpenPageName, $ScriptName, $DataDir, $RssDir, @MyRules, @MyMaintenance, @MyInitVariables, $FullUrlPattern, $FreeLinkPattern, $CommentsPrefix, $UseCache, @UserGotoBarPages, %AdminPages, @MyAdminCode, @MyFooters, $UsePathInfo);
  19. =encoding utf8
  20. =head1 Local Names
  21. This module allows you to centrally define redirections. Thus you can
  22. define that whenever somebody links to the page Foo the link will
  23. point to http://example.com/. These redirects are defined on special
  24. page called LocalNames. You can change the name of that page by
  25. setting C<$LocalNamesPage>.
  26. You can also link to external lists of such redirections, as long as
  27. they use the namespace description format developed by Lion Kimbro.
  28. Basically you can "import" redirections. These external lists are
  29. cached in a directory called C<ln> inside the data directory. You can
  30. change the directory by setting C<$LnDir>.
  31. =cut
  32. our ($LocalNamesPage, %LocalNames, $LocalNamesCollect,
  33. $LocalNamesCollectMaxWords, $LnDir, $LnCacheHours,
  34. %WantedPages);
  35. $LocalNamesPage = 'LocalNames';
  36. $LocalNamesCollect = 0;
  37. $LocalNamesCollectMaxWords = 2;
  38. # LN caching is written very similar to the RSS file caching
  39. $LnDir = "$DataDir/ln";
  40. $LnCacheHours = 12;
  41. sub GetLnFile {
  42. return $LnDir . '/' . UrlEncode(shift);
  43. }
  44. =head2 Maintenance
  45. Whenever maintenance runs, all the cached external lists of
  46. redirections are deleted whenever they are older than twelve hours.
  47. You can change this expiry time by setting C<$LnCacheHours>.
  48. =cut
  49. push (@MyMaintenance, \&LnMaintenance);
  50. sub LnMaintenance {
  51. if (opendir(DIR, encode_utf8($RssDir))) { # cleanup if they should expire anyway
  52. foreach my $file (readdir(DIR)) {
  53. Unlink("$RssDir/$file") if $Now - Modified($file) > $LnCacheHours * 3600;
  54. }
  55. closedir DIR;
  56. }
  57. }
  58. =head2 Defining Local Names
  59. Local Names are defined on the LocalNames page.
  60. If you create ordinary named external links such as
  61. C<[http://ln.taoriver.net/ Local Names Website]> on the LocalNames
  62. page, you will have defined a new Local Name. If you write C<[[Local
  63. Names Website]]> elsewhere on the site (and the page does not exist),
  64. that link will point to the website you specified.
  65. You can link from the LocalNames page to existing namespace
  66. descriptions. These other namespace descriptions must use the
  67. namespace description format developed by Lion Kimbro. If you write
  68. C<[[ln:URL]]> or C<[[ln:URL text]]>, this will import all the Local
  69. Names defined there into your wiki.
  70. Example: C<[[ln:http://ln.taoriver.net/localnames.txt Lion's Example
  71. Localnames List]]>.
  72. Currently only LN records with absolute URLs are parsed correctly. All
  73. other record types are ignored.
  74. If you want to learn more about local names, see
  75. L<http://ln.taoriver.net/>.
  76. =cut
  77. # render [[ln:url]] as something clickable
  78. push(@MyRules, \&LocalNamesRule);
  79. sub LocalNamesRule {
  80. if (m/\G\[\[ln:$FullUrlPattern\s*([^\]]*)\]\]/cg) {
  81. # [[ln:url text]], [[ln:url]]
  82. return $q->a({-class=>'url outside ln', -href=>$1}, $2||$1);
  83. }
  84. return;
  85. }
  86. =head2 Initialization
  87. The LocalNames page is added to C<%AdminPages> so that the
  88. Administration page will list a link to it. The LocalNames page will
  89. be read and parsed for every request. The result is that the
  90. C<%LocalNames> hash has pagenames as keys and URLs to redirect to as
  91. values.
  92. If the LocalNames page refers to external lists of redirections, these
  93. will be read from the cache or fetched anew if older than twelve
  94. hours. If you use the cache=0 parameter in an URL or set C<$UseCache>
  95. to zero or less, Oddmuse will B<fetch the lists of redirections every
  96. single time>. Using the cache=0 parameter is a way to force Oddmuse to
  97. expire the cache. Setting C<$UseCache> to 0 should not be used on a
  98. live site.
  99. Definitions of redirections on the LocalNames take precedence over
  100. redirections defined on remote sites. Earlier lists of redirections
  101. take precedence over later lists.
  102. We ignore the spec at L<http://ln.taoriver.net/spec-1.2.html#Syntax>
  103. when considering what names we allow, since Oddmuse will parse them as
  104. regular links anyway.
  105. =cut
  106. push(@MyInitVariables, \&LocalNamesInit);
  107. sub LocalNamesInit {
  108. %WantedPages = (); # list of missing pages used during this request
  109. %LocalNames = ();
  110. $LocalNamesPage = FreeToNormal($LocalNamesPage); # spaces to underscores
  111. $AdminPages{$LocalNamesPage} = 1;
  112. my $data = GetPageContent($LocalNamesPage);
  113. while ($data =~ m/\[$FullUrlPattern\s+([^\]]+?)\]/g) {
  114. my ($page, $url) = ($2, $1);
  115. my $id = FreeToNormal($page);
  116. $LocalNames{$id} = $url;
  117. }
  118. # Now read data from ln links, checking cache if possible. For all
  119. # URLs not in the cache or with invalid cache, fetch the file again,
  120. # and save it in the cache.
  121. my @ln = $data =~ m/\[\[ln:$FullUrlPattern[^\]]*?\]\]/g;
  122. my %todo = map {$_, GetLnFile($_)} @ln;
  123. my %data = ();
  124. if (GetParam('cache', $UseCache) > 0) {
  125. foreach my $uri (keys %todo) { # read cached rss files if possible
  126. if ($Now - Modified($todo{$uri}) < $LnCacheHours * 3600) {
  127. $data{$uri} = ReadFile($todo{$uri});
  128. delete($todo{$uri}); # no need to fetch them below
  129. }
  130. }
  131. }
  132. my @need_cache = keys %todo;
  133. if (keys %todo > 1) { # try parallel access if available
  134. eval { # see code example in LWP::Parallel, not LWP::Parllel::UserAgent (no callbacks here)
  135. require LWP::Parallel::UserAgent;
  136. my $pua = LWP::Parallel::UserAgent->new();
  137. foreach my $uri (keys %todo) {
  138. if (my $res = $pua->register(HTTP::Request->new('GET', $uri))) {
  139. warn $res->error_as_HTML;
  140. }
  141. }
  142. %todo = (); # because the uris in the response may have changed due to redirects
  143. my $entries = $pua->wait();
  144. foreach (keys %$entries) {
  145. my $uri = $entries->{$_}->request->uri;
  146. $data{$uri} = $entries->{$_}->response->content;
  147. }
  148. }
  149. }
  150. foreach my $uri (keys %todo) { # default operation: synchronous fetching
  151. $data{$uri} = GetRaw($uri);
  152. }
  153. if (GetParam('cache', $UseCache) > 0) {
  154. CreateDir($LnDir);
  155. foreach my $uri (@need_cache) {
  156. WriteStringToFile(GetLnFile($uri), $data{$uri});
  157. }
  158. }
  159. # go through the urls in the right order, this time
  160. foreach my $ln (@ln) {
  161. my ($previous_type, $previous_url);
  162. foreach my $line (split(/[\r\n]+/, $data{$ln})) {
  163. if ($line =~ /^LN\s+"$FreeLinkPattern"\s+(?:"$FullUrlPattern"|\.)$/
  164. or $previous_type eq 'LN'
  165. and $line =~ /^\.\s+"$FreeLinkPattern"\s+(?:"$FullUrlPattern"|\.)$/) {
  166. my ($name, $url) = ($1, $2);
  167. $url = $previous_url if not $url and $previous_url;
  168. $previous_url = $url;
  169. $previous_type = 'LN';
  170. my $id = FreeToNormal($name);
  171. # Only store this, if not already stored!
  172. if (not $LocalNames{$id}) {
  173. $LocalNames{$id} = $url;
  174. }
  175. } else {
  176. $previous_type = undef;
  177. }
  178. # elsif ($line =~ /^NS "(.*)" "$FullUrlPattern"$/g) {
  179. # }
  180. }
  181. }
  182. }
  183. =head2 Name Resolution
  184. We want Near Links only to have an effect for pages that do not exist
  185. locally. It should not take precedence! Thus, we hook into
  186. C<ResolveId>; this function returns a list of four elements: CSS
  187. class, resolved id, title (eg. for popups), and a boolean saying
  188. whether the page exists or not. If the second element is empty, then
  189. no page exists and we check C<%LocalNames> for a match. If there is a
  190. match, we return the URL using the CSS class "near" and the title
  191. "LocalNames". The CSS class is the same that is used for Near Links
  192. because the effect is so similar.
  193. Note: Existing local pages take precedence over local names, but local
  194. names take precedence over Near Links.
  195. We also keep track of wanted pages (links to missing pages) so that we
  196. can printe a list of definition links at the bottom using the Define
  197. Action (see below).
  198. =cut
  199. *OldLocalNamesResolveId = \&ResolveId;
  200. *ResolveId = \&NewLocalNamesResolveId;
  201. sub NewLocalNamesResolveId {
  202. my $id = shift;
  203. my ($class, $resolved, @rest) = OldLocalNamesResolveId($id, @_);
  204. if ((not $resolved or $class eq 'near') and $LocalNames{$id}) {
  205. return ('near', $LocalNames{$id}, $LocalNamesPage);
  206. } else {
  207. $WantedPages{$id} = 1 if not $resolved; # this is provisional!
  208. return ($class, $resolved, @rest);
  209. }
  210. }
  211. =head2 Automatically Defining Local Names
  212. It is possible to have Oddmuse automatically define local names as you
  213. edit pages. In order to enable this, set C<$LocalNamesCollect> to 1.
  214. Once you this, every time you save a page with a named external link
  215. such as C<[http://www.emacswiki.org/alex/ Alex]>, this will add or
  216. update the corresponding entry on the LocalNames page.
  217. In order to reduce the number of entries thus collected, only external
  218. links with a name consisting of one or two words are used. You can
  219. change this word limit by setting C<$LocalNamesCollectMaxWords>.
  220. The default limit of two words assumes that you might want to make
  221. C<Alex> a link, or C<Alex Schroeder>, but not C<the example on Alex’s
  222. blog> (five “words”, since the code looks at whitespace only).
  223. =cut
  224. *LocalNamesOldSave = \&Save;
  225. *Save = \&LocalNamesNewSave;
  226. sub LocalNamesNewSave {
  227. LocalNamesOldSave(@_);
  228. my ($currentid, $text) = @_;
  229. # avoid recursion
  230. return if $currentid eq $LocalNamesPage or not $LocalNamesCollect;
  231. my $currentname = $currentid;
  232. $currentname =~ s/_/ /g;
  233. local ($OpenPageName, %Page);
  234. OpenPage($LocalNamesPage);
  235. my $localnames = $Page{text};
  236. my %map = ();
  237. while ($text =~ /\[$FullUrlPattern\s+(([^ \]]+?\s*){1,$LocalNamesCollectMaxWords})\]/g) {
  238. my ($page, $url) = ($2, $1);
  239. my $id = FreeToNormal($page);
  240. $map{$id} = () unless defined $map{$id};
  241. $map{$id}{$url} = 1;
  242. }
  243. my %collection = ();
  244. foreach my $id (keys %map) {
  245. # canonical form with trimmed spaces and no underlines
  246. my $page = $id;
  247. $page =~ s/_/ /g;
  248. # skip if the mapping from id to url already defined matches at
  249. # least one of the definitions on the current page.
  250. next if $map{$id}{$LocalNames{$id}};
  251. $collection{$page} = 1;
  252. # pick a random url from the list
  253. my @urls = keys %{$map{$id}};
  254. my $url = $urls[0];
  255. # if a different mapping exists already; change the old mapping to the new one
  256. # if the change fails (eg. the page name is not in canonical form), don't skip!
  257. next if $LocalNames{$id}
  258. and $localnames =~ s/\[$LocalNames{$id}\s+$page\]/[$url $page]/g;
  259. # add a new entry at the end
  260. $localnames .= "\n\n* [$url $page]"
  261. . Ts(" -- defined on %s", "[[$currentname]]");
  262. $LocalNames{$id} = $url; # prevent multiple additions
  263. }
  264. # minor change
  265. my @collection = sort keys %collection;
  266. Save($LocalNamesPage, $localnames,
  267. Tss("Local names defined on %1: %2", $currentname,
  268. length(@collection > 1)
  269. ? join(', and ',
  270. join(', ', @collection[0 .. $#collection-1]),
  271. $collection[-1])
  272. : @collection), 1)
  273. unless $localnames eq $Page{text};
  274. }
  275. =head2 Local Names Format
  276. The Ln Action lists all the local pages in the local names format
  277. defined in the specification. Example URL:
  278. C<http://localhost/cgi-bin/wiki?action=ln>.
  279. If you want to learn more about local names and the format used, see
  280. L<http://ln.taoriver.net/>.
  281. =cut
  282. $Action{ln} = \&DoLocalNames;
  283. sub DoLocalNames {
  284. print GetHttpHeader('text/plain');
  285. print "X VERSION 1.2\n";
  286. print "# Local Pages\n";
  287. foreach my $id (AllPagesList()) {
  288. my $title = $id;
  289. $title =~ s/_/ /g;
  290. my $url = $ScriptName . ($UsePathInfo ? '/' : '?') . $id;
  291. print qq{LN "$title" "$url"\n};
  292. }
  293. if (GetParam('expand', 0)) {
  294. print "# Local names defined on $LocalNamesPage:\n";
  295. my $data = GetPageContent($LocalNamesPage);
  296. while ($data =~ m/\[$FullUrlPattern\s+([^\]]+?)\]/g) {
  297. my ($title, $url) = ($2, $1);
  298. my $id = FreeToNormal($title);
  299. print qq{LN "$title" "$url"\n};
  300. }
  301. print "# Namespace delegations defined on $LocalNamesPage:\n";
  302. while ($data =~ m/\[\[ln:$FullUrlPattern([^\]]*)?\]\]/g) {
  303. my ($title, $url) = ($2, $1);
  304. my $id = FreeToNormal($title);
  305. print qq{NS "$title" "$url"\n};
  306. }
  307. } else {
  308. print "# Local names defined on $LocalNamesPage:\n";
  309. foreach my $id (keys %LocalNames) {
  310. my $title = $id;
  311. $title =~ s/_/ /g;
  312. print qq{LN "$title" "$LocalNames{$id}"\n};
  313. }
  314. }
  315. }
  316. =head2 Define Action
  317. The Define Action allows you to interactively add local names using a
  318. form. Example URL: C<http://localhost/cgi-bin/wiki?action=define>.
  319. You can also provide the C<name> and C<link> parameters yourself if
  320. you want to use this action from a script.
  321. As wanted pages (links to missing pages) come up, you will get links
  322. to appropriate define actions in your footer.
  323. =cut
  324. $Action{define} = \&DoDefine;
  325. sub DoDefine {
  326. if (GetParam('link', '') and GetParam('name', '')) {
  327. SetParam('title', $LocalNamesPage);
  328. SetParam('text', GetPageContent($LocalNamesPage) . "\n* ["
  329. . GetParam('link', '') . ' ' . GetParam('name', '')
  330. . "]\n");
  331. SetParam('summary', 'Defined ' . GetParam('name'));
  332. return DoPost($LocalNamesPage);
  333. } else {
  334. print GetHeader('', T('Define')),
  335. $q->start_div({-class=>'content define'}),
  336. GetFormStart(undef, 'get', 'def');
  337. my $go = T('Go!');
  338. print $q->p($q->label({-for=>"defined"}, T('Name:') . ' '),
  339. $q->textfield(-name=>"name", -id=>"defined",
  340. -tabindex=>"1", -size=>20));
  341. print $q->p($q->label({-for=>"definition"}, T('URL:') . ' '),
  342. $q->textfield(-name=>"link", -id=>"definition",
  343. -tabindex=>"2", -size=>20));
  344. print $q->p($q->submit(-label=>$go, -tabindex=>"3"),
  345. GetHiddenValue('action', 'define'),
  346. GetHiddenValue('recent_edit', 'on'));
  347. print $q->end_form, $q->end_div();
  348. PrintFooter();
  349. }
  350. }
  351. push(@MyAdminCode, sub {
  352. my ($id, $menuref, $restref) = @_;
  353. push(@$menuref, ScriptLink('action=define', T('Define Local Names'),
  354. 'define'));
  355. });
  356. # link to define action for non-existing pages
  357. push(@MyFooters, \&GetWantedPages);
  358. sub GetWantedPages {
  359. # skip admin pages
  360. foreach my $id (@UserGotoBarPages, keys %AdminPages) {
  361. delete $WantedPages{$id};
  362. }
  363. # skip comment pages
  364. if ($CommentsPrefix) {
  365. foreach my $id (keys %WantedPages) {
  366. delete $WantedPages{$id} if $id =~ /^$CommentsPrefix/; # TODO use $CommentsPattern ?
  367. }
  368. }
  369. # now something more complicated: if near-links.pl was loaded, then
  370. # %WantedPages may contain pages that will in fact resolve. That's
  371. # why we try to resolve all the wanted ids again. And since
  372. # resolving ids will do stuff to %WantedPages, we need to make a
  373. # copy of the ids we're looking at.
  374. my @wanted;
  375. foreach my $id (keys %WantedPages) {
  376. my ($class, $resolved) = ResolveId($id);
  377. push(@wanted, $id) unless $resolved;
  378. }
  379. # if any wanted pages remain, print them
  380. if (@wanted) {
  381. return $q->div({-class=>'definition'},
  382. $q->p(T('Define external redirect:'), ' ',
  383. map { my $page = NormalToFree($_);
  384. ScriptLink('action=define;name='
  385. . UrlEncode($page),
  386. $page,
  387. 'define');
  388. } @wanted));
  389. }
  390. return '';
  391. }