namespaces.pl 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. # Copyright (C) 2004–2022 Alex Schroeder <alex@gnu.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify it under
  4. # the terms of the GNU General Public License as published by the Free Software
  5. # Foundation; either version 3 of the License, or (at your option) any later
  6. # version.
  7. #
  8. # This program is distributed in the hope that it will be useful, but WITHOUT
  9. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  11. #
  12. # You should have received a copy of the GNU General Public License along with
  13. # this program. If not, see <http://www.gnu.org/licenses/>.
  14. use strict;
  15. use v5.10;
  16. =head1 Namespaces Extension
  17. This module allows you to create namespaces in Oddmuse. The effect is
  18. that C<http://localhost/cgi-bin/wiki/Claudia/HomePage> and
  19. C<http://localhost/cgi-bin/wiki/Alex/HomePage> are two different
  20. pages. The first URL points to the C<HomePage> in the C<Claudia>
  21. namespace, the second URL points to the C<HomePage> in the C<Alex>
  22. namespace. Both namespaces have their own list of pages and their own
  23. list of changes, and so on.
  24. C<http://localhost/cgi-bin/wiki/HomePage> points to the C<HomePage> in
  25. the main namespace. It is usually named C<Main>. The name can be
  26. changed using the C<$NamespacesMain> option.
  27. URL abbreviations will automatically be created for you. Thus, you can
  28. link to the various pages using C<Claudia:HomePage>, C<Alex:HomePage>,
  29. and C<Main:HomePage>. An additional abbreviation is also created
  30. automatically: C<Self>. You can use it to link to actions such as
  31. C<Self:action=index>. The name of this self-referring abbreviation can
  32. be changed using the C<$NamespacesSelf> option.
  33. =cut
  34. AddModuleDescription('namespaces.pl', 'Namespaces Extension');
  35. use File::Glob ':glob';
  36. our ($q, %Action, %Page, @IndexList, $Now, %InterSite, $SiteName, $ScriptName,
  37. $UsePathInfo, $DataDir, $HomePage, @MyInitVariables, @MyAdminCode, $FullUrl,
  38. $LinkPattern, $InterSitePattern, $FreeLinks, $FreeLinkPattern,
  39. $InterLinkPattern, $FreeInterLinkPattern, $UrlProtocols, $WikiLinks, $FS,
  40. $BannedContent, $BannedHosts, $RcFile, $RcOldFile, $RcDefault, $PageDir,
  41. $KeepDir, $LockDir, $TempDir, $IndexFile, $VisitorFile, $NoEditFile,
  42. $WikiDescription, $LastUpdate, $StaticDir, $StaticUrl, $InterWikiMoniker,
  43. $RefererDir, $PermanentAnchorsFile, @IndexList, %IndexHash);
  44. our ($NamespacesMain, $NamespacesSelf, $NamespaceCurrent,
  45. $NamespaceRoot, $NamespaceSlashing, @NamespaceParameters,
  46. %Namespaces, $NamespacesRootDataDir);
  47. $NamespacesMain = 'Main'; # to get back to the main namespace
  48. $NamespacesSelf = 'Self'; # for your own namespace
  49. $NamespaceCurrent = ''; # the current namespace, if any
  50. $NamespaceRoot = ''; # the original $ScriptName
  51. =head2 Configuration
  52. The option C<@NamespaceParameters> can be used by programmers to
  53. indicate for which parameters the last element of path_info shall
  54. count as a namespace. Consider these examples:
  55. http://example.org/wiki/Foo/Bar
  56. http://example.org/wiki/Foo?action=browse;id=Bar
  57. http://example.org/wiki/Foo?title=Bar;text=Baz
  58. http://example.org/wiki/Foo?search=bar
  59. In all the listed cases, Foo is supposed to be the namespace.
  60. In the following cases, however, we're interested in the page Foo and
  61. not the namespace Foo.
  62. http://example.org/wiki/Foo?username=bar
  63. =cut
  64. @NamespaceParameters = qw(action search title match);
  65. $NamespaceSlashing = 0; # affects : decoding NamespaceRcLines
  66. # try to do it before any other module starts meddling with the
  67. # variables (eg. localnames.pl)
  68. unshift(@MyInitVariables, \&NamespacesInitVariables);
  69. sub GetNamespace {
  70. my $ns = GetParam('ns', '');
  71. if (not $ns and $UsePathInfo) {
  72. my $path_info = decode_utf8($q->path_info());
  73. # make sure ordinary page names are not matched!
  74. if ($path_info =~ m|^/($InterSitePattern)(/.*)?|
  75. and ($2 or $q->keywords or NamespaceRequiredByParameter())) {
  76. $ns = $1;
  77. }
  78. }
  79. ReportError(Ts('%s is not a legal name for a namespace', $ns))
  80. if $ns and $ns !~ m/^($InterSitePattern)$/;
  81. return $ns;
  82. }
  83. sub NamespacesInitVariables {
  84. %Namespaces = ();
  85. # Do this before changing the $DataDir and $ScriptName
  86. if ($UsePathInfo) {
  87. $Namespaces{$NamespacesMain} = $ScriptName . '/';
  88. foreach my $name (Glob("$DataDir/*")) {
  89. if (IsDir($name)
  90. and $name =~ m|/($InterSitePattern)$|
  91. and $name ne $NamespacesMain
  92. and $name ne $NamespacesSelf) {
  93. $Namespaces{$1} = $ScriptName . '/' . $1 . '/';
  94. }
  95. }
  96. }
  97. $NamespaceRoot = $ScriptName; # $ScriptName may be changed below
  98. $NamespacesRootDataDir = $DataDir; # $DataDir may be chanegd below
  99. $NamespaceCurrent = '';
  100. my $ns = GetNamespace();
  101. if ($ns
  102. and $ns ne $NamespacesMain
  103. and $ns ne $NamespacesSelf) {
  104. $NamespaceCurrent = $ns;
  105. # Change some stuff from the original InitVariables call:
  106. $SiteName .= ' ' . NormalToFree($NamespaceCurrent);
  107. $InterWikiMoniker = $NamespaceCurrent;
  108. $DataDir .= '/' . $NamespaceCurrent;
  109. $PageDir = "$DataDir/page";
  110. $KeepDir = "$DataDir/keep";
  111. $RefererDir = "$DataDir/referer";
  112. $TempDir = "$DataDir/temp";
  113. $LockDir = "$TempDir/lock";
  114. $NoEditFile = "$DataDir/noedit";
  115. $RcFile = "$DataDir/rc.log";
  116. $RcOldFile = "$DataDir/oldrc.log";
  117. $IndexFile = "$DataDir/pageidx";
  118. $VisitorFile = "$DataDir/visitors.log";
  119. $PermanentAnchorsFile = "$DataDir/permanentanchors";
  120. # $ConfigFile -- shared
  121. # $ModuleDir -- shared
  122. # $NearDir -- shared
  123. $ScriptName .= '/' . UrlEncode($NamespaceCurrent);
  124. $FullUrl .= '/' . UrlEncode($NamespaceCurrent);
  125. $StaticDir .= '/' . $NamespaceCurrent; # from static-copy.pl
  126. $StaticUrl .= UrlEncode($NamespaceCurrent) . '/'
  127. if substr($StaticUrl,-1) eq '/'; # from static-copy.pl
  128. $WikiDescription .= "<p>Current namespace: $NamespaceCurrent</p>";
  129. $LastUpdate = Modified($IndexFile);
  130. CreateDir($DataDir);
  131. }
  132. $Namespaces{$NamespacesSelf} = $ScriptName . '?';
  133. # reinitialize
  134. @IndexList = ();
  135. ReInit();
  136. # transfer list of sites
  137. foreach my $key (keys %Namespaces) {
  138. $InterSite{$key} = $Namespaces{$key} unless $InterSite{$key};
  139. }
  140. # remove the artificial ones
  141. delete $Namespaces{$NamespacesMain};
  142. delete $Namespaces{$NamespacesSelf};
  143. }
  144. sub NamespaceRequiredByParameter {
  145. foreach my $key (@NamespaceParameters) {
  146. return 1 if $q->param($key);
  147. }
  148. }
  149. =head Spam fighting
  150. We want to share C<BannedContent> and C<BannedHosts> between all the wiki
  151. namespaces. Therefore, we need to handle a number of cases:
  152. C<UserIsBanned> uses C<GetPageContent($BannedHosts)> and C<BannedContent> uses
  153. C<GetPageContent($BannedContent)>, therefore C<GetPageContent> is going to get
  154. modified.
  155. C<DoBanHosts> in F<ban-contributors.pl> uses C<DoPost($BannedContent)> and
  156. C<DoPost($BannedHosts)>, therefore C<DoPost> is going to get modified.
  157. =cut
  158. *OldNamespaceGetPageContent = \&GetPageContent;
  159. *GetPageContent = \&NewNamespaceGetPageContent;
  160. sub NewNamespaceGetPageContent {
  161. my ($id) = @_;
  162. if ($NamespaceCurrent and ($id eq $BannedContent or $id eq $BannedHosts)) {
  163. local $PageDir = "$NamespacesRootDataDir/page";
  164. # we cannot use ReadFileOrDie because our $IndexHash{$id} does not reflect the existence of the root file
  165. my ($status, $data) = ReadFile(GetPageFile($id));
  166. return ParseData($data)->{text} if $status;
  167. return '';
  168. }
  169. return OldNamespaceGetPageContent(@_);
  170. }
  171. *OldNamespaceDoPost = \&DoPost;
  172. *DoPost = \&NewNamespaceDoPost;
  173. sub NewNamespaceDoPost {
  174. my ($id) = @_;
  175. if ($NamespaceCurrent and ($id eq $BannedContent or $id eq $BannedHosts)) {
  176. local $DataDir = $NamespacesRootDataDir;
  177. local $PageDir = "$DataDir/page";
  178. local $KeepDir = "$DataDir/keep";
  179. local $LockDir = "$TempDir/lock";
  180. local $NoEditFile = "$DataDir/noedit";
  181. local $RcFile = "$DataDir/rc.log";
  182. local $RcOldFile = "$DataDir/oldrc.log";
  183. local $IndexFile = "$DataDir/pageidx";
  184. @IndexList = %IndexHash = ();
  185. AllPagesList(); # reload from new pageidx
  186. return OldNamespaceDoPost(@_);
  187. }
  188. return OldNamespaceDoPost(@_);
  189. }
  190. =head2 RecentChanges
  191. RecentChanges in the main namespace will list changes to all the
  192. namespaces. In order to limit it to the changes in the main namespace
  193. itself, you need to use the local=1 parameter. Example:
  194. C<http://localhost/cgi-bin/wiki?action=rc;local=1>
  195. First we need to read all the C<rc.log> files from the various
  196. namespace directories. If the first entry in the log file is not old
  197. enough, we need to prepend the C<oldrc.log> file.
  198. The tricky part is how to introduce the namespace prefixes to the
  199. links to be printed without copying the whole machinery. All the new
  200. lines belong to a namespace. Prefix every pagename with the namespace
  201. and a colon, ie. C<Alex:HomePage>. This provides
  202. C<NewNamespaceScriptUrl> with the necessary information to build the
  203. correct URL to link to.
  204. =cut
  205. *OldNamespaceGetRcLines = \&GetRcLines;
  206. *GetRcLines = \&NewNamespaceGetRcLines;
  207. sub NewNamespaceGetRcLines { # starttime, hash of seen pages to use as a second return value
  208. my $starttime = shift || GetParam('from', 0) ||
  209. $Now - GetParam('days', $RcDefault) * 86400; # 24*60*60
  210. my $filterOnly = GetParam('rcfilteronly', '');
  211. # these variables apply accross logfiles
  212. my %match = $filterOnly ? map { $_ => 1 } SearchTitleAndBody($filterOnly) : ();
  213. my %following = ();
  214. my @result = ();
  215. # Get the list of rc.log and oldrc.log files we need; rcoldfiles is
  216. # a mapping from rcfiles to rcoldfiles.
  217. my @rcfiles = ();
  218. my %rcoldfiles = ();
  219. my %namespaces = ();
  220. if ($NamespaceCurrent or GetParam('local', 0)) {
  221. push(@rcfiles, $RcFile);
  222. $rcoldfiles{$RcFile} = $RcOldFile;
  223. } else {
  224. push(@rcfiles, $RcFile);
  225. $rcoldfiles{$RcFile} = $RcOldFile;
  226. # Get the namespaces from the intermap instead of parsing the
  227. # directory. This reduces the chances of getting different
  228. # results.
  229. foreach my $site (keys %InterSite) {
  230. if (substr($InterSite{$site}, 0, length($ScriptName)) eq $ScriptName) {
  231. my $ns = $site;
  232. my $file = "$DataDir/$ns/rc.log";
  233. push(@rcfiles, $file);
  234. $namespaces{$file} = $ns;
  235. $rcoldfiles{$file} = "$DataDir/$ns/oldrc.log";
  236. }
  237. }
  238. }
  239. # Now each rcfile and the matching rcoldfile if required. When
  240. # opening a rcfile, compare the first timestamp with the
  241. # starttime. If any rcfile exists with no timestamp before the
  242. # starttime, we need to open its rcoldfile.
  243. foreach my $rcfile (@rcfiles) {
  244. open(my $F, '<:encoding(UTF-8)', encode_utf8($rcfile));
  245. my $line = <$F>;
  246. my ($ts) = split(/$FS/, $line); # the first timestamp in the regular rc file
  247. my @new;
  248. if (not $ts or $ts > $starttime) { # we need to read the old rc file, too
  249. push(@new, GetRcLinesFor($rcoldfiles{$rcfile}, $starttime,\%match, \%following));
  250. }
  251. push(@new, GetRcLinesFor($rcfile, $starttime, \%match, \%following));
  252. # strip rollbacks in each namespace separately
  253. @new = StripRollbacks(@new);
  254. # prepend the namespace to both pagename and author
  255. my $ns = $namespaces{$rcfile};
  256. if ($ns) {
  257. for (my $i = $#new; $i >= 0; $i--) {
  258. # page id
  259. $new[$i][1] = $ns . ':' . $new[$i][1];
  260. # username
  261. $new[$i][5] = $ns . ':' . $new[$i][5];
  262. }
  263. }
  264. push(@result, @new);
  265. }
  266. # We need to resort these lines... <=> forces numerical comparison
  267. # which is just what we need here, as the timestamp is the first
  268. # part of the line.
  269. @result = sort { $a->[0] <=> $b->[0] } @result;
  270. # check the first timestamp in the default file, maybe read old log file
  271. # GetRcLinesFor is trying to save memory space, but some operations
  272. # can only happen once we have all the data.
  273. return LatestChanges(@result);
  274. }
  275. =head2 RSS feed
  276. When retrieving the RSS feed with the parameter full=1, one would
  277. expect the various items to contain the fully rendered HTML.
  278. Unfortunately, this is not so, the reason being that OpenPage tries to
  279. open a file for id C<Test:Foo> which does not exist. Now, just
  280. fiddling with OpenPage will not work, because when rendering a page
  281. within a particular namespace, we need a separate C<%IndexHash> to
  282. figure out which links will actually point to existing pages and which
  283. will not. In fact, we need something alike the code for
  284. C<NamespacesInitVariables> to run. To do this elegantly would require
  285. us to create some sort of context, and cache it, and restore the
  286. default when we're done. All of this would be complicated and brittle.
  287. Until then, the parameter full=1 just is not supported.
  288. =head2 Encoding pagenames
  289. C<NewNamespaceUrlEncode> uses C<UrlEncode> to encode pagenames, with
  290. one exception. If the local variable C<$NamespaceSlashing> has been
  291. set, the first encoded slash is converted back into an ordinary slash.
  292. This should preserve the slash added between namespace and pagename.
  293. =cut
  294. *OldNamespaceUrlEncode = \&UrlEncode;
  295. *UrlEncode = \&NewNamespaceUrlEncode;
  296. sub NewNamespaceUrlEncode {
  297. my $result = OldNamespaceUrlEncode(@_);
  298. $result =~ s/\%2f/\// if $NamespaceSlashing; # just one should be enough
  299. return $result;
  300. }
  301. =head2 Printing Links
  302. We also need to override C<ScriptUrl>. This is done by
  303. C<NewNamespaceScriptUrl>. This is where the slash in the pagename is
  304. used to build a new URL pointing to the appropriate page in the
  305. appropriate namespace.
  306. In addition to that, this function makes sure that backlinks to edit
  307. pages with redirections result in an appropriate URL.
  308. This is used for ordinary page viewing and RecentChanges.
  309. =cut
  310. *OldNamespaceScriptUrl = \&ScriptUrl;
  311. *ScriptUrl = \&NewNamespaceScriptUrl;
  312. sub NewNamespaceScriptUrl {
  313. my ($action, @rest) = @_;
  314. local $ScriptName = $ScriptName;
  315. if ($action =~ /^($UrlProtocols)\%3a/) { # URL-encoded URL
  316. # do nothing (why do we need this?)
  317. } elsif ($action =~ m!(.*?)([^/?&;=]+)%3a(.*)!) {
  318. # $2 is supposed to match the $InterSitePattern, but it might be
  319. # UrlEncoded in Main:RecentChanges. If $2 contains Umlauts, for
  320. # example, the encoded $2 will no longer match $InterSitePattern.
  321. # We have a likely candidate -- now perform an additional test.
  322. my ($s1, $s2, $s3) = ($1, $2, $3);
  323. my $s = UrlDecode($s2);
  324. if ($s =~ /^$InterSitePattern$/) {
  325. if ("$s2:$s3" eq GetParam('oldid', '')) {
  326. if ($s2 eq $NamespacesMain) {
  327. $ScriptName = $NamespaceRoot;
  328. } else {
  329. $ScriptName = $NamespaceRoot . '/' . $s2;
  330. }
  331. } else {
  332. $ScriptName .= '/' . $s2;
  333. }
  334. $action = $s1 . $s3;
  335. }
  336. }
  337. return OldNamespaceScriptUrl($action, @rest);
  338. }
  339. =head2 Invalid Pagenames
  340. Since the adding of a namespace and colon makes all these new
  341. pagenames invalid, C<NamespaceValidId> is overridden with an empty
  342. function called C<NamespaceValidId> while C<NewNamespaceDoRc> is
  343. running. This is important so that author links are printed.
  344. =cut
  345. *OldNamespaceGetAuthorLink = \&GetAuthorLink;
  346. *GetAuthorLink = \&NewNamespaceGetAuthorLink;
  347. sub NewNamespaceGetAuthorLink {
  348. local *OldNamespaceValidId = \&ValidId;
  349. local *ValidId = \&NewNamespaceValidId;
  350. # local $NamespaceSlashing = 1;
  351. return OldNamespaceGetAuthorLink(@_);
  352. }
  353. sub NewNamespaceValidId {
  354. local $FreeLinkPattern = "($InterSitePattern:)?$FreeLinkPattern";
  355. local $LinkPattern = "($InterSitePattern:)?$LinkPattern";
  356. return OldNamespaceValidId(@_);
  357. }
  358. =head2 Redirection User Interface
  359. When redirection form page A to B, you will never see the link "Edit
  360. this page" at the bottom of page A. Therefore Oddmuse adds a link at
  361. the top of page B (if you arrived there via a redirection), linking to
  362. the edit page for A. C<NewNamespaceBrowsePage> has the necessary code
  363. to make this work for redirections between namespaces. This involves
  364. passing namespace and pagename via the C<oldid> parameter to the next
  365. script invokation, where C<ScriptUrl> will be used to create the
  366. appropriate link. This is where C<NewNamespaceScriptUrl> comes into
  367. play.
  368. =cut
  369. *OldNamespaceBrowsePage = \&BrowsePage;
  370. *BrowsePage = \&NewNamespaceBrowsePage;
  371. sub NewNamespaceBrowsePage {
  372. #REDIRECT into different namespaces
  373. my ($id, $raw, $comment, $status) = @_;
  374. OpenPage($id);
  375. my ($revisionPage, $revision) = GetTextRevision(GetParam('revision', ''), 1);
  376. my $text = $revisionPage->{text};
  377. my $oldId = GetParam('oldid', '');
  378. if (not $oldId and not $revision and (substr($text, 0, 10) eq '#REDIRECT ')
  379. and (($WikiLinks and $text =~ /^\#REDIRECT\s+(($InterSitePattern:)?$InterLinkPattern)/)
  380. or ($FreeLinks and $text =~ /^\#REDIRECT\s+\[\[(($InterSitePattern:)?$FreeInterLinkPattern)\]\]/))) {
  381. my ($ns, $page) = map { UrlEncode($_) } split(/:/, FreeToNormal($1));
  382. $oldId = ($NamespaceCurrent || $NamespacesMain) . ':' . $id;
  383. local $ScriptName = $NamespaceRoot || $ScriptName;
  384. print GetRedirectPage("action=browse;ns=$ns;oldid=$oldId;id=$page", $id);
  385. } else {
  386. return OldNamespaceBrowsePage(@_);
  387. }
  388. }
  389. =head2 List Namespaces
  390. The namespaces action will link all known namespaces.
  391. =cut
  392. $Action{namespaces} = \&DoNamespacesList;
  393. sub DoNamespacesList {
  394. if (GetParam('raw', 0)) {
  395. print GetHttpHeader('text/plain');
  396. print join("\n", sort keys %Namespaces), "\n";
  397. } else {
  398. print GetHeader('', T('Namespaces')),
  399. $q->start_div({-class=>'content namespaces'}),
  400. GetFormStart(undef, 'get'), GetHiddenValue('action', 'browse'),
  401. GetHiddenValue('id', $HomePage);
  402. my $new = $q->textfield('ns') . ' ' . $q->submit('donamespace', T('Go!'));
  403. print $q->ul($q->li([map { $q->a({-href => $Namespaces{$_} . $HomePage},
  404. $_); } sort keys %Namespaces]), $q->li($new));
  405. print $q->end_form() . $q->end_div();
  406. PrintFooter();
  407. }
  408. }
  409. push(@MyAdminCode, \&NamespacesMenu);
  410. sub NamespacesMenu {
  411. my ($id, $menuref, $restref) = @_;
  412. push(@$menuref,
  413. ScriptLink('action=namespaces',
  414. T('Namespaces'),
  415. 'namespaces'));
  416. }
  417. *NamespacesOldGetId = \&GetId;
  418. *GetId = \&NamespacesNewGetId;
  419. sub NamespacesNewGetId {
  420. my $id = NamespacesOldGetId(@_);
  421. # http://example.org/cgi-bin/wiki.pl?action=browse;ns=Test;id=Test means NamespaceCurrent=Test and id=Test
  422. # http://example.org/cgi-bin/wiki.pl/Test/Test means NamespaceCurrent=Test and id=Test
  423. # In this case GetId() will have set the parameter Test to 1.
  424. # http://example.org/cgi-bin/wiki.pl/Test?rollback-1234=foo
  425. # This doesn't set the Test parameter.
  426. return if $id and $UsePathInfo and $id eq $NamespaceCurrent and not GetParam($id) and not GetParam('ns');
  427. return $id;
  428. }