near-links.pl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. # Copyright (C) 2003–2013 Alex Schroeder <alex@gnu.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. use strict;
  16. use v5.10;
  17. AddModuleDescription('near-links.pl', 'Near Links');
  18. our ($q, %AdminPages, %InterSite, $CommentsPrefix, $DataDir, $UseCache, @MyFooters, @MyMaintenance, @MyInitVariables, @Debugging, $InterSitePattern, @UserGotoBarPages, @IndexOptions);
  19. =head1 Near Links
  20. URL abbreviations facilitate linking to other wikis. For example, if
  21. you define the abbreviation Community, you might link to
  22. Community:WhyWikiWorks. Near Links takes this even further: If you use
  23. an ordinary link to WhyWikiWorks, this will link to
  24. Community:WhyWikiWorks if there is no local WhyWikiWorks page.
  25. =cut
  26. our (%NearSite, %NearSource, %NearLinksUsed, $NearDir, $NearMap,
  27. %NearSearch, $SisterSiteLogoUrl, %NearLinksException);
  28. =head2 Options
  29. =cut
  30. $NearMap = 'NearMap';
  31. $NearDir = "$DataDir/near"; # for page indexes and .png files of other sites
  32. =head2 Initialization
  33. There are several steps required before Near Links work as expected.
  34. You must have an B<InterMap> page to define URL abbreviations. There,
  35. you associate a prefix with a partial URL on every line that starts
  36. with a space:
  37. Community http://www.communitywiki.org/en/
  38. You must have a B<NearMap> page. There, you associate some of the
  39. prefixes defined on the InterMap page with an URL that tells Oddmuse
  40. how to retrieve the list of all pages from the site. Again, only lines
  41. starting with a single space are considered, allowing you to mix
  42. explanatory paragraphs with data.
  43. Community http://www.communitywiki.org/en?action=index;raw=1
  44. Remember to use the same key as on the InterMap page!
  45. There is an optional third item you can place on that line telling
  46. Oddmuse how to forward searches to the remote site.
  47. You must run B<Maintenance> once. At the end of the maintenance output
  48. you should see a line for every prefix on your NearMap telling you
  49. that the list of pages is being retrieved: "Getting page index file
  50. for Community." This page index file is created in the C<near>
  51. directory in your data directory.
  52. You can change the name of the NearMap page by setting the C<$NearMap>
  53. option:
  54. $NearMap = 'Local_Near_Map';
  55. You can change the directory used for caching the remote page index
  56. files by setting the C<$NearDir> option:
  57. $NearDir = '/var/oddmuse/near';
  58. =cut
  59. push(@MyInitVariables, \&NearLinksInit);
  60. sub NearLinksInit {
  61. $NearMap = FreeToNormal($NearMap); # just making sure
  62. $AdminPages{$NearMap} = 1; # list it on the admin page
  63. %NearLinksUsed = (); # list of links used during this request
  64. %NearSite = ();
  65. %NearSearch = ();
  66. %NearSource = ();
  67. # Don't overwrite the values other modules might have set
  68. $NearLinksException{rc} = 1;
  69. $NearLinksException{rss} = 1;
  70. foreach (split(/\n/, GetPageContent($NearMap))) {
  71. if (/^ ($InterSitePattern)[ \t]+([^ ]+)(?:[ \t]+([^ ]+))?$/) {
  72. my ($site, $url, $search) = ($1, $2, $3);
  73. next unless $InterSite{$site};
  74. $NearSite{$site} = $url;
  75. $NearSearch{$site} = $search if $search;
  76. my ($status, $data) = ReadFile("$NearDir/$site");
  77. next unless $status;
  78. foreach my $page (split(/\n/, $data)) {
  79. push(@{$NearSource{$page}}, $site);
  80. }
  81. }
  82. }
  83. }
  84. =head2 Maintenance
  85. C<NearLinksMaintenance> is added to C<@MyMaintenance> in order to
  86. download all page indexes from the remote sites defined on the NearMap
  87. page. The download is skipped if the existing page index for the
  88. remote site is less than twelve hours old. If you want to force this,
  89. you need to delete the page indexes in the cache directory. Look for
  90. the C<near> directory in your data directory, unless you set
  91. C<$NearDir>.
  92. =cut
  93. push(@MyMaintenance, \&NearLinksMaintenance);
  94. sub NearLinksMaintenance {
  95. if (%NearSite) {
  96. CreateDir($NearDir);
  97. # skip if less than 12h old and caching allowed (the default)
  98. foreach my $site (keys %NearSite) {
  99. next if GetParam('cache', $UseCache) > 0
  100. and -f "$NearDir/$site" and -M "$NearDir/$site" < 0.5;
  101. print $q->p(Ts('Getting page index file for %s.', $site));
  102. my $data = GetRaw($NearSite{$site});
  103. print $q->p($q->strong(Ts('%s returned no data, or LWP::UserAgent is not available.',
  104. $q->a({-href=>$NearSite{$site}},
  105. $NearSite{$site})))) unless $data;
  106. WriteStringToFile("$NearDir/$site", $data);
  107. }
  108. }
  109. }
  110. =head2 Debugging Initialization
  111. You can use the Debug Action to list the Near Links found in addition
  112. to the Inter Links:
  113. http://localhost/cgi-bin/wiki?action=debug
  114. If there are no Near Links defined, check the initialization
  115. requirements listed above.
  116. =cut
  117. push(@Debugging, \&DoNearLinksList);
  118. sub DoNearLinksList {
  119. print $q->h2(T('Near links:')),
  120. $q->p(join('; ',
  121. map { GetPageLink($_) . ': ' . join(', ', @{$NearSource{$_}})}
  122. sort keys %NearSource));
  123. }
  124. =head2 Name Resolution
  125. We want Near Links only to have an effect for pages that do not exist
  126. locally. It should not take precedence! Thus, we hook into
  127. C<ResolveId>; this function returns a list of four elements: CSS
  128. class, resolved id, title (eg. for popups), and a boolean saying
  129. whether the page exists or not. If the second element is empty, then
  130. no page exists and we check C<%NearSource> for a match. C<%NearSource>
  131. uses the page id as a key and a list of sites as the value. We just
  132. pick the first site on the list and return it as an URL (and using the
  133. CSS class "near").
  134. The pages explicitly excluded from being Near Links are the ones most
  135. likely to confuse first users: All the pages on C<@UserGotoBarPages>
  136. and all pages in C<%AdminPages>.
  137. =cut
  138. *OldNearLinksResolveId = \&ResolveId;
  139. *ResolveId = \&NewNearLinksResolveId;
  140. sub NewNearLinksResolveId {
  141. my $id = shift;
  142. my @result = OldNearLinksResolveId($id, @_);
  143. my %forbidden = map { $_ => 1 } @UserGotoBarPages, %AdminPages;
  144. $forbidden{$id} = 1 if $CommentsPrefix and $id =~ /^$CommentsPrefix/;
  145. if (not $result[1] and $NearSource{$id} and not $forbidden{$id}) {
  146. $NearLinksUsed{$id} = 1;
  147. my $site = $NearSource{$id}[0];
  148. return ('near', GetInterSiteUrl($site, $id), $site); # return source as title attribute
  149. } else {
  150. return @result;
  151. }
  152. }
  153. =head2 Search
  154. This module allows you to send search terms to remote sites. You need
  155. to tell Oddmuse how to do this, however. On the NearMap, this is what
  156. you do: Lines starting with a single space and an URL abbreviation
  157. (defined on the InterMap), an URL that retrieves the list of pages on
  158. the remote site, and an URL that runs a search and returns the result
  159. in RSS 3.0 format.
  160. Here's an example. Remember that this should all go on a single line
  161. starting with a single space:
  162. Community http://www.communitywiki.org/cw?action=index;raw=1
  163. http://www.communitywiki.org/cw?search=%s;raw=1;near=0
  164. If you want to know more about the RSS 3.0 format, take a look at the
  165. specification: L<http://www.aaronsw.com/2002/rss30>.
  166. The effect is that when you search something, the search will be a
  167. local search. There will be a link called "Search sites on the NearMap
  168. as well" at the beginning of your local search results. Clicking on
  169. the link will include search results from remote sites as well.
  170. =head3 Development
  171. C<%NearLinksException> is used to store all the actions where a search
  172. should not result in the printing of pages. Theoretically we could use
  173. the presence of the C<$func> parameter in the call to
  174. C<SearchTitleAndBody>, but there are two problems with this approach:
  175. We don't know what the code does. It might just be collecting data
  176. without printing anything. And even if we did, and skipped the
  177. printing, we'd be searching the near pages in vain in the case of
  178. RecentChanges and RSS feeds, since the near pages have no change date
  179. and therefore can never be presented chronologically. Developer input
  180. will be necessary in all cases.
  181. =cut
  182. *OldNearLinksSearchMenu = \&SearchMenu;
  183. *SearchMenu = \&NewNearLinksSearchMenu;
  184. sub NewNearLinksSearchMenu {
  185. my $string = shift;
  186. my $result = OldNearLinksSearchMenu($string);
  187. $result .= ' ' . ScriptLink('near=2;search=' . UrlEncode($string),
  188. Ts('Search sites on the %s as well', $NearMap))
  189. if %NearSearch and GetParam('near', 1) < 2;
  190. return $result;
  191. }
  192. *OldNearLinksSearchTitleAndBody = \&SearchTitleAndBody;
  193. *SearchTitleAndBody = \&NewNearLinksSearchTitleAndBody;
  194. sub NewNearLinksSearchTitleAndBody {
  195. my $string = shift;
  196. my @result = OldNearLinksSearchTitleAndBody($string, @_);
  197. my $action = GetParam('action', 'browse');
  198. @result = SearchNearPages($string, @result)
  199. if GetParam('near', 1) and not $NearLinksException{$action};
  200. return @result;
  201. }
  202. sub SearchNearPages {
  203. my $string = shift;
  204. my %found = map {$_ => 1} @_;
  205. my $regex = SearchRegexp($string);
  206. if (%NearSearch and GetParam('near', 1) > 1 and GetParam('context',1)) {
  207. foreach my $site (keys %NearSearch) {
  208. my $url = $NearSearch{$site};
  209. $url =~ s/\%s/UrlEncode($string)/eg or $url .= UrlEncode($string);
  210. print $q->hr(), $q->p(Ts('Fetching results from %s:', $q->a({-href=>$url}, $site)))
  211. unless GetParam('raw', 0);
  212. my $data = GetRaw($url);
  213. my @entries = split(/\n\n+/, $data);
  214. shift @entries; # skip head
  215. foreach my $entry (@entries) {
  216. my $entryPage = ParseData($entry); # need to pass reference
  217. my $name = $entryPage->{title};
  218. next if $found{$name}; # do not duplicate local pages
  219. $found{$name} = 1;
  220. PrintSearchResultEntry($entryPage, $regex); # with context and full search!
  221. }
  222. }
  223. }
  224. if (%NearSource and (GetParam('near', 1) or GetParam('context',1) == 0)) {
  225. my $intro = 0;
  226. foreach my $name (sort keys %NearSource) {
  227. next if $found{$name}; # do not duplicate local pages
  228. if (SearchString($string, NormalToFree($name))) {
  229. $found{$name} = 1;
  230. print $q->hr() . $q->p(T('Near pages:')) unless GetParam('raw', 0) or $intro;
  231. $intro = 1;
  232. PrintPage($name); # without context!
  233. }
  234. }
  235. }
  236. return keys(%found);
  237. }
  238. =head2 Index Of All Pages
  239. The index of all pages will offer a new option called "Include near
  240. pages". This uses the C<near> parameter. Example:
  241. C<http://localhost/cgi-bin/wiki?action=index;near=1>.
  242. We don't need to list remote pages that also exist locally, since the
  243. index will resolve pages as they get printed. If we list remote pages,
  244. all we'll do is have the name twice on the list, and they'll get
  245. resolved to the same target (the local page), which is unexpected.
  246. =cut
  247. # IndexOptions must be set in MyInitVariables for translations to
  248. # work.
  249. push(@MyInitVariables, sub {
  250. push(@IndexOptions, ['near', T('Include near pages'), 0,
  251. \&ListNearPages]);
  252. });
  253. sub ListNearPages {
  254. my %pages = %NearSource;
  255. if (GetParam('pages', 1)) {
  256. foreach my $page (AllPagesList()) {
  257. delete $pages{$page};
  258. }
  259. }
  260. return keys %pages;
  261. }
  262. =head2 Defining Near Linked Pages
  263. When Oddmuse links to a remote site via NearLinks, it is difficult to
  264. create a local copy of the page. After all, there is no edit link.
  265. That's why the appropriate edit links will be presented at the bottom
  266. of the page. This list will be prefixed with a link to the page called
  267. B<EditNearLinks>. This allows you to explain what's going on to your
  268. users.
  269. To change the name of this page, use translation:
  270. C<$Translate{EditNearLinks}='Define these pages locally';>
  271. These edit links for local pages are inside a div with the class
  272. "near".
  273. =cut
  274. push(@MyFooters, \&GetNearLinksUsed);
  275. sub GetNearLinksUsed {
  276. if (%NearLinksUsed) {
  277. return $q->div({-class=>'near'},
  278. $q->p(GetPageLink(T('EditNearLinks')) . ':',
  279. map { GetEditLink($_, $_); } keys %NearLinksUsed));
  280. }
  281. return '';
  282. }
  283. =head2 Twin Pages
  284. When looking at local pages that also exist on remote sites, Oddmuse
  285. will add links to the various remote versions at the bottom of the
  286. page. (These remote sites are sometimes also referred to as "sister
  287. sites".)
  288. You will need logos for these remote sites. You specify where the
  289. logos are to be found by setting C<$SisterSiteLogoUrl>. The C<%s> will
  290. be replaced by the URL abbreviation used.
  291. Example:
  292. $SisterSiteLogoUrl = "http://www.emacswiki.org/pics/%s.png";
  293. For the Community:WhyWikiWorks page, this will result in the URL
  294. L<http://www.emacswiki.org/pics/Community.png>.
  295. These logos for twin pages are inside a div with the class "sister".
  296. =cut
  297. push(@MyFooters, \&GetSisterSites);
  298. $SisterSiteLogoUrl = 'file:///tmp/oddmuse/%s.png';
  299. sub GetSisterSites {
  300. my $id = shift;
  301. if ($id and $NearSource{$id}) {
  302. my $sistersites = T('The same page on other sites:') . $q->br();
  303. foreach my $site (@{$NearSource{$id}}) {
  304. my $logo = $SisterSiteLogoUrl;
  305. $logo =~ s/\%s/$site/g;
  306. $sistersites .= $q->a({-href=>GetInterSiteUrl($site, $id),
  307. -title=>"$site:$id"},
  308. $q->img({-src=>$logo,
  309. -alt=>"$site:$id"}));
  310. }
  311. return $q->div({-class=>'sister'}, $q->p($sistersites));
  312. }
  313. return '';
  314. }