toc.pl 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. #!/usr/bin/env perl
  2. use strict;
  3. use v5.10;
  4. our ($q, $bol, @HtmlStack, @MyRules, @MyInitVariables);
  5. # ====================[ toc.pl ]====================
  6. =head1 NAME
  7. toc - An Oddmuse module for adding a "Table of Contents" to Oddmuse Wiki pages.
  8. =head1 INSTALLATION
  9. toc is easily installable; move this file into the B<wiki/modules/>
  10. directory for your Oddmuse Wiki.
  11. =cut
  12. AddModuleDescription('toc.pl', 'Table of Contents Extension');
  13. # ....................{ CONFIGURATION }....................
  14. =head1 CONFIGURATION
  15. toc is easily configurable; set these variables in the B<wiki/config.pl> file
  16. for your Oddmuse Wiki.
  17. =cut
  18. our ($TocHeaderText,
  19. $TocClass,
  20. $TocAutomatic,
  21. $TocAnchorPrefix,
  22. $TocIsApplyingAutomaticRules);
  23. =head2 $TocHeaderText
  24. The string to be displayed as the header for each page's table of contents.
  25. =cut
  26. $TocHeaderText = 'Contents';
  27. =head2 $TocClass
  28. The string to be used as the HTML class for each page's table of contents. (This
  29. is the string with which your CSS stylesheet customizes table of contents.)
  30. =cut
  31. $TocClass = 'toc';
  32. =head2 $TocAutomatic
  33. A boolean that, if true, automatically prepends the table of contents to the
  34. first header for a page or, if false, does not. If false, you must explicitly
  35. add the table of contents to each page for which you'd like one by explicitly
  36. adding the "<toc>" markup to that page.
  37. By default, this boolean is true.
  38. =cut
  39. $TocAutomatic = 1;
  40. =head2 $TocAnchorPrefix
  41. The string for prefixing the names of toc anchor links with. The default
  42. should be fine, generally; it creates toc anchor links resembling:
  43. =over
  44. =item L<http://your.wiki.com/SomePage#Heading1>. A link to the first header on
  45. SomePage page for some wiki.
  46. =item L<http://your.wiki.com/SomePage#Heading2>. A link to the second header on
  47. SomePage page for some wiki.
  48. =back
  49. And so on. This provides Wiki users a "clean" mechanism for bookmarking,
  50. marking, and sharing links to particular segments of a Wiki page.
  51. =cut
  52. $TocAnchorPrefix = 'Heading';
  53. =head2 $TocIsApplyingAutomaticRules
  54. A boolean that, if true, performs a few "automatic" rules on behalf of this
  55. extension. These are:
  56. =over
  57. =item Add a unique C<id="${ID}"> attribute to each header tag on every page.
  58. This ensures that every link in the table of contents, for every page,
  59. refers to one and only one header tag in that page.
  60. =item Add an automatic table of contents to every page, if the
  61. C<$TocAutomatic> boolean is also enabled.
  62. =back
  63. By default, this boolean is true. (This is a good thing. Unless you know what
  64. you're doing, you should probably leave this as is.)
  65. =cut
  66. $TocIsApplyingAutomaticRules = 1;
  67. # ....................{ INITIALIZATION }....................
  68. push(@MyInitVariables, \&TocInit);
  69. # A number uniquely identifying this current header. This allows us to link each
  70. # list entry in the table of contents to the header it refers to.
  71. my $TocHeaderNumber;
  72. sub TocInit {
  73. $TocHeaderNumber = '';
  74. }
  75. # ....................{ MARKUP }....................
  76. *RunMyRulesTocOld = \&RunMyRules;
  77. *RunMyRules = \&RunMyRulesToc;
  78. push(@MyRules, \&TocRule);
  79. =head2 MARKUP
  80. toc handles page markup resembling:
  81. <toc header_text="$HeaderText" class="$Class">
  82. Or, in its abbreviated form:
  83. <toc "$HeaderText" "$Class">
  84. Or, in its maximally abbreviated form:
  85. <toc>
  86. C<$HeaderText> is the header text for this table of contents: that is, text
  87. heading the list of this table of contents. This is optional. If not specified,
  88. it defaults to the value of the C<$TocHeaderText> variable.
  89. C<$Class> is the HTML class for this table of contents, for CSS stylization of
  90. that table. This is optional. If not specified, it defaults to "toc".
  91. =cut
  92. sub TocRule {
  93. # <toc...> markup. This explicitly displays a table of contents at this point.
  94. if ($bol and
  95. m~\G&lt;toc(/([A-Za-z\x{0080}-\x{fffd}/]+))? # $1
  96. (\s+(?:header_text\s*=\s*)?"(.+?)")? # $3
  97. (\s+(?:class\s*=\s*)?"(.+?)")? # $5
  98. &gt;[ \t]*(\n|$)~cgx) { # $7
  99. my ($toc_class_old, $toc_header_text, $toc_class) = ($2, $4, $6);
  100. $TocHeaderNumber = 1;
  101. $toc_header_text = $TocHeaderText if not defined $toc_header_text;
  102. # A backwards-compatibility fix! Antiquated versions of this module
  103. # accepted markup resembling:
  104. # <toc/${CLASS_NAME_1}/${CLASS_NAME_2}/...>
  105. #
  106. # which this conditional converts to the more conventional:
  107. # <toc class="${CLASS_NAME_1} ${CLASS_NAME_2} ...">
  108. if ($toc_class_old) {
  109. $toc_class = $toc_class_old;
  110. $toc_class =~ tr~/~ ~;
  111. } $toc_class = $TocClass.($toc_class ? ' '.$toc_class : '');
  112. # If the topmost HTML tag is a paragraph, then the table of contents will
  113. # be the first child element of that paragraph; however, embedding that
  114. # table in a paragraph is quite unnecessary, and even obstructs our
  115. # CSS stylization of that table elsewhere. In this case, we close this
  116. # paragraph; this ensures that paragraph will have no content and
  117. # therefore be removed, later, by the Oddmuse engine. This is slightly
  118. # hacky -- but sufficiently necessary.
  119. return ($HtmlStack[0] eq 'p' ? CloseHtmlEnvironment() : '')
  120. .qq{<!-- toc header_text="$toc_header_text" class="$toc_class" -->}
  121. .AddHtmlEnvironment('p');
  122. } return;
  123. }
  124. =head2 RunMyRulesToc
  125. Automates insertion of the <toc ...> markup for Wiki pages not explicitly
  126. specifying it. This searches the current page's HTML output for the first HTML
  127. header tag for that page and, when found, automatically inserts <toc ...> markup
  128. immediately before that tag.
  129. =cut
  130. sub RunMyRulesToc {
  131. my $html = RunMyRulesTocOld(@_);
  132. # Some markup rule converted the input Wiki markup into HTML. If this HTML is
  133. # an HTML header tag, then we add a new "id" tag attribute to it (so as to
  134. # uniquely identify it for later linking to from the table of contents).
  135. # to the user, without embellishments or change.
  136. if ($TocIsApplyingAutomaticRules and $html) {
  137. if ($TocAutomatic and not $TocHeaderNumber and $bol and $html =~
  138. s~(<h[1-6][^>]*>)
  139. ~<!-- toc header_text="$TocHeaderText" class="$TocClass" -->$1~x) {
  140. $TocHeaderNumber = 1;
  141. }
  142. # If we've seen at least one HTML header and we're not currently in the
  143. # sidebar (as is the odd case when $TocPageName ne $OpenPageName), then
  144. # add a unique identifier to all (possible) HTML headers in this string.
  145. if ($TocHeaderNumber) {
  146. # To avoid infinite substitution recursion, we avoid matching header tags
  147. # already having id attributes. Unfortunately, I'm not as adept a regular
  148. # expression wizard as I should be, and was unable to get a negative
  149. # lookahead expression resembling (?!\s+id=".*?") to work. As such, I
  150. # use a simple negative character class hack. *shrug*
  151. while ($html =~ s~<h([1-6](\s+[^i]\w+\s+=\s+"[^"]")*)>
  152. ~<h$1 id="$TocAnchorPrefix$TocHeaderNumber">~gx) {
  153. $TocHeaderNumber++;
  154. }
  155. }
  156. }
  157. return $html;
  158. }
  159. # ....................{ MARKUP =after }....................
  160. my $TocCommentPattern = qr~\Q<!-- toc\E.*?\Q -->\E~;
  161. *OldTocApplyRules = \&ApplyRules;
  162. *ApplyRules = \&NewTocApplyRules;
  163. # This changes the entire rendering engine such that it no longer
  164. # prints output as it goes along. Instead all the output is collected
  165. # in $html, post-processed by inserting the table of contents where
  166. # appropriate, and then printed at the very end.
  167. sub NewTocApplyRules {
  168. my ($html, $blocks, $flags);
  169. {
  170. local *STDOUT;
  171. my $html_unfixed;
  172. open( STDOUT, '>', \$html_unfixed) or die "Can't open memory file: $!";
  173. binmode STDOUT, ":encoding(UTF-8)";
  174. ($blocks, $flags) = map { decode_utf8($_) } OldTocApplyRules(@_);
  175. close STDOUT;
  176. # do not delete!
  177. $html = decode_utf8($html_unfixed);
  178. }
  179. # If there are at least two HTML headers on this page, insert a table of
  180. # contents.
  181. if ($TocHeaderNumber > 2) {
  182. $html =~ s~\Q<!-- toc header_text="\E([^"]+)\Q" class="\E([^"]+)\Q" -->\E~
  183. GetTocHtml(\$html, \$blocks, $1, $2)~eg;
  184. }
  185. # Otherwise, remove the table of contents placeholder comments.
  186. else {
  187. $html =~ s~$TocCommentPattern~~g;
  188. $blocks =~ s~$TocCommentPattern~~g;
  189. }
  190. print $html;
  191. return ($blocks, $flags);
  192. }
  193. sub GetTocHtml {
  194. my ($html_, $blocks_, $toc_header_text, $toc_class) = @_;
  195. my $toc_html =
  196. $q->start_div({-class=> $toc_class})
  197. .$q->h2(T($toc_header_text));
  198. # This forces evaluation of the "while ($list_depth < $header_depth) {"
  199. # clause on the first iteration of the outer while loop. Yes: trust us.
  200. my $list_depth = 0;
  201. while ($$html_ =~ m~<h([1-6])[^>]* id="($TocAnchorPrefix\d+)">(.*?)</h\1>~cg) {
  202. my ($header_depth, $header_id, $header_text) = ($1, $2, $3);
  203. # Strip all links from header text. (They unnecessarily convolute the
  204. # interface, since the header text is already embedded in a link to the
  205. # appropriate header in the dialogue script's body.)
  206. $header_text =~ s~<a[^>]*>(.*?)</a>~$1~;
  207. # By Usemod convention, all headers begin with depth 2. This algorithm,
  208. # however, expects headers to begin with depth 1. Thus, to "streamline"
  209. # things, we transform it appropriately. ;-)
  210. $header_depth-- if defined &UsemodRule or defined &CreoleRule;
  211. # If this is the first header and if this header's depth is deeper than 1,
  212. # we manually clamp this header's depth to 1 so as to ensure the first list
  213. # item in the first ordered list resides at depth 1. (Failure to do this
  214. # produces very odd ordered lists.)
  215. if (not $list_depth and $header_depth > 1) { $header_depth = 1; }
  216. # Close ordered lists and list items for prior headings deeper than this
  217. # heading's depth.
  218. while ($list_depth > $header_depth and $list_depth != 1) {
  219. $list_depth--;
  220. $toc_html .= '</li></ol>';
  221. }
  222. # If the current ordered list is at this heading's depth, add this heading
  223. # as a list item to that list.
  224. if ($list_depth == $header_depth) {
  225. $toc_html .= '</li><li>';
  226. }
  227. # Otherwise, add ordered lists and list items until at this heading's depth.
  228. else {
  229. while ($list_depth < $header_depth) {
  230. $list_depth++;
  231. $toc_html .= '<ol><li>';
  232. }
  233. }
  234. $toc_html .= "<a href=\"#$header_id\">$header_text</a>";
  235. }
  236. # Close ordered lists and list items for the last heading.
  237. while ($list_depth--) { $toc_html .= '</li></ol>'; }
  238. $toc_html .= $q->end_div();
  239. # Lastly, perform the same replacement on the cached version of this clean
  240. # block. (Failure to do this would ensure the first creation of this page
  241. # would emit the proper HTML, but all subsequent refreshings of this page
  242. # the improperly cached version.)
  243. $$blocks_ =~ s~$TocCommentPattern~$toc_html~;
  244. return $toc_html;
  245. }
  246. =head1 TODO
  247. This extension no longer cleanly integrates with the Sidebar extension, since
  248. this extension now prints the table of contents for a page after having printed
  249. all other content for that page (rather than while printing all content for that
  250. page, as was previously the case).
  251. This is not correctable, unfortunately. The simplest solution is to suggest that
  252. current Sidebar users migrate to the Crossbar module -- and that is where I
  253. leave it.
  254. =head1 COPYRIGHT AND LICENSE
  255. The information below applies to everything in this distribution,
  256. except where noted.
  257. Copyleft 2008 by B.w.Curry <http://www.raiazome.com>.
  258. Copyright 2004, 2005, 2006, 2007 by Alex Schroeder <alex@emacswiki.org>.
  259. This program is free software; you can redistribute it and/or modify
  260. it under the terms of the GNU General Public License as published by
  261. the Free Software Foundation; either version 3 of the License, or
  262. (at your option) any later version.
  263. This program is distributed in the hope that it will be useful,
  264. but WITHOUT ANY WARRANTY; without even the implied warranty of
  265. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  266. GNU General Public License for more details.
  267. You should have received a copy of the GNU General Public License
  268. along with this program. If not, see L<http://www.gnu.org/licenses/>.
  269. =cut