T2h_l2h.pm 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. #+##############################################################################
  2. #
  3. # T2h_l2h.pm: interface to LaTeX2HTML
  4. #
  5. # Copyright (C) 1999, 2000, 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation; either version 3 of the License,
  10. # or (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. #
  20. # This code was taken from the main texi2html file in 2006.
  21. # Certainly originally written by Olaf Bachmann.
  22. #
  23. #-##############################################################################
  24. require 5.0;
  25. use strict;
  26. package Texi2HTML::LaTeX2HTML;
  27. use Cwd;
  28. # latex2html conversions consist of three stages:
  29. # 1) to latex: Put "latex" code into a latex file
  30. # (init, to_latex, finish_to_latex)
  31. # 2) to html: Use latex2html to generate corresponding html code and images
  32. # (to_html)
  33. # 3) from html: Extract generated code and images from latex2html run
  34. # (init_from_html, do_tex)
  35. # init l2h defaults for files and names
  36. # global variable used for caching
  37. # FIXME there is no reason for this variable to be global
  38. use vars qw(
  39. %l2h_cache
  40. );
  41. my ($l2h_name, $l2h_latex_file, $l2h_cache_file, $l2h_html_file, $l2h_prefix);
  42. # holds the status of latex2html operations. If 0 it means that there was
  43. # an error
  44. my $status = 0;
  45. my $debug;
  46. my $verbose;
  47. my $docu_rdir;
  48. my $docu_name;
  49. # init_from_html
  50. my $extract_error_count;
  51. my $invalid_counter_count;
  52. # change_image_file_names
  53. my %l2h_img; # associate src file to destination file
  54. # such that files are not copied twice
  55. my $image_count;
  56. # do_tex
  57. my $html_output_count = 0; # html text outputed in html result file
  58. ##########################
  59. #
  60. # First stage: Generation of Latex file
  61. # Initialize with: init
  62. # Add content with: to_latex ($text) --> HTML placeholder comment
  63. # Finish with: finish_to_latex
  64. #
  65. my $l2h_latex_preamble = <<EOT;
  66. % This document was automatically generated by the l2h extenstion of texi2html
  67. % DO NOT EDIT !!!
  68. \\documentclass{article}
  69. \\usepackage{html}
  70. \\begin{document}
  71. EOT
  72. my $l2h_latex_closing = <<EOT;
  73. \\end{document}
  74. EOT
  75. my %l2h_to_latex = (); # associate a latex text with the index in the
  76. # html result array.
  77. my @l2h_to_latex = (); # array used to associate the index with
  78. # the original latex text.
  79. my $latex_count = 0; # number of latex texts really stored
  80. my $latex_converted_count = 0; # number of latex texts passed through latex2html
  81. my $to_latex_count = 0; # total number of latex texts processed
  82. my $cached_count = 0; # number of cached latex texts
  83. %l2h_cache = (); # the cache hash. Associate latex text with
  84. # html from the previous run
  85. my @l2h_from_html; # array of resulting html
  86. my %global_count = (); # associate a command name and the
  87. # corresponding counter to the index in the
  88. # html result array
  89. # set $status to 1, if l2h could be initalized properly, to 0 otherwise
  90. sub init()
  91. {
  92. %l2h_to_latex = (); # associate a latex text with the index in the
  93. # html result array.
  94. @l2h_to_latex = (); # array used to associate the index with
  95. # the original latex text.
  96. $latex_count = 0; # number of latex texts really stored
  97. $latex_converted_count = 0; # number of latex texts passed through latex2html
  98. $to_latex_count = 0; # total number of latex texts processed
  99. $cached_count = 0; # number of cached latex texts
  100. %l2h_cache = (); # the cache hash. Associate latex text with
  101. # html from the previous run
  102. @l2h_from_html = (); # array of resulting html
  103. %global_count = (); # associate a command name and the
  104. # corresponding counter to the index in the
  105. # html result array
  106. $extract_error_count = 0;
  107. $invalid_counter_count = 0;
  108. %l2h_img = (); # associate src file to destination file
  109. # such that files are not copied twice
  110. $image_count = 1;
  111. $html_output_count = 0; # html text outputed in html result file
  112. $status = 0;
  113. return if ($Texi2HTML::Config::null_device_file{$Texi2HTML::THISDOC{'filename'}->{'top'}});
  114. $docu_name = $Texi2HTML::THISDOC{'file_base_name'};
  115. $docu_rdir = $Texi2HTML::THISDOC{'destination_directory'};
  116. $docu_rdir = '' if (!defined($docu_rdir));
  117. $l2h_name = "${docu_name}_l2h";
  118. $l2h_latex_file = "$docu_rdir${l2h_name}.tex";
  119. $l2h_cache_file = "${docu_rdir}${docu_name}-l2h_cache.pm";
  120. # destination dir -- generated images are put there, should be the same
  121. # as dir of enclosing html document --
  122. $l2h_html_file = "$docu_rdir${l2h_name}.html";
  123. $l2h_prefix = "${l2h_name}_";
  124. $debug = $Texi2HTML::THISDOC{'debug_l2h'};
  125. $verbose = Texi2HTML::Config::get_conf('VERBOSE');
  126. unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
  127. {
  128. unless (open(L2H_LATEX, ">$l2h_latex_file"))
  129. {
  130. main::document_error ("l2h: Can't open latex file '$l2h_latex_file' for writing: $!");
  131. $status = 0;
  132. return;
  133. }
  134. warn "# l2h: use ${l2h_latex_file} as latex file\n" if ($verbose);
  135. print L2H_LATEX $l2h_latex_preamble;
  136. }
  137. # open the database that holds cached text
  138. init_cache() if (!defined(Texi2HTML::Config::get_conf('L2H_SKIP')) or Texi2HTML::Config::get_conf('L2H_SKIP'));
  139. $status = 1;
  140. }
  141. # print text (2nd arg) into latex file (if not already there nor in cache)
  142. # which can be later on replaced by the latex2html generated text.
  143. #
  144. sub to_latex($$$)
  145. {
  146. my $command = shift;
  147. my $text = shift;
  148. my $counter = shift;
  149. return unless ($status);
  150. if ($command eq 'tex')
  151. {
  152. $text .= ' ';
  153. }
  154. elsif ($command eq 'math')
  155. {
  156. $text = "\$".$text."\$";
  157. }
  158. $to_latex_count++;
  159. $text =~ s/(\s*)$//;
  160. # try whether we have text already on things to do
  161. my $count = $l2h_to_latex{$text};
  162. unless ($count)
  163. {
  164. $latex_count++;
  165. $count = $latex_count;
  166. # try whether we can get it from cache
  167. my $cached_text = from_cache($text);
  168. if (defined($cached_text))
  169. {
  170. $cached_count++;
  171. # put the cached result in the html result array
  172. $l2h_from_html[$count] = $cached_text;
  173. }
  174. else
  175. {
  176. $latex_converted_count++;
  177. unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
  178. {
  179. print L2H_LATEX "\\begin{rawhtml}\n\n";
  180. print L2H_LATEX "<!-- l2h_begin $l2h_name $count -->\n";
  181. print L2H_LATEX "\\end{rawhtml}\n";
  182. print L2H_LATEX "$text\n";
  183. print L2H_LATEX "\\begin{rawhtml}\n";
  184. print L2H_LATEX "<!-- l2h_end $l2h_name $count -->\n\n";
  185. print L2H_LATEX "\\end{rawhtml}\n";
  186. }
  187. }
  188. $l2h_to_latex[$count] = $text;
  189. $l2h_to_latex{$text} = $count;
  190. }
  191. $global_count{"${command}_$counter"} = $count;
  192. return 1;
  193. }
  194. # print closing into latex file and close it
  195. sub finish_to_latex()
  196. {
  197. my $reused = $to_latex_count - $latex_converted_count - $cached_count;
  198. unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
  199. {
  200. print L2H_LATEX $l2h_latex_closing;
  201. close (L2H_LATEX);
  202. }
  203. warn "# l2h: finished to latex ($cached_count cached, $reused reused, $latex_converted_count to process)\n" if ($verbose);
  204. unless ($latex_count)
  205. {
  206. # no @tex nor @math
  207. finish();
  208. return 0;
  209. }
  210. return 1;
  211. }
  212. ###################################
  213. # Second stage: Use latex2html to generate corresponding html code and images
  214. #
  215. # to_html([$l2h_latex_file, [$l2h_html_dir]]):
  216. # Call latex2html on $l2h_latex_file
  217. # Put images (prefixed with $l2h_name."_") and html file(s) in $l2h_html_dir
  218. # Return 1, on success
  219. # 0, otherwise
  220. #
  221. sub to_html()
  222. {
  223. my ($call, $dotbug);
  224. # when there are no tex constructs to convert (happens in case everything
  225. # comes from the cache), there is no latex2html run
  226. if (Texi2HTML::Config::get_conf('L2H_SKIP') or ($latex_converted_count == 0))
  227. {
  228. warn "# l2h: skipping latex2html run\n" if ($verbose);
  229. return 1;
  230. }
  231. # Check for dot in directory where dvips will work
  232. if (Texi2HTML::Config::get_conf('L2H_TMP'))
  233. {
  234. if (Texi2HTML::Config::get_conf('L2H_TMP') =~ /\./)
  235. {
  236. main::document_warn ("l2h: l2h_tmp dir contains a dot.");
  237. $dotbug = 1;
  238. }
  239. }
  240. else
  241. {
  242. if (cwd() =~ /\./)
  243. {
  244. main::document_warn ("l2h: current dir contains a dot.");
  245. $dotbug = 1;
  246. }
  247. }
  248. # fix it, if necessary and hope that it works
  249. #$Texi2HTML::Config::L2H_TMP = "/tmp" if ($dotbug);
  250. return 0 if ($dotbug);
  251. $call = Texi2HTML::Config::get_conf('L2H_L2H');
  252. # use init file, if specified
  253. #my $init_file = main::locate_init_file(Texi2HTML::Config::get_conf('L2H_FILE'));
  254. my $init_file = Texi2HTML::Config::get_conf('L2H_FILE');
  255. $call = $call . " -init_file " . $init_file if (defined($init_file) and $init_file ne '' and -f $init_file and -r $init_file);
  256. # set output dir
  257. $call .= (($docu_rdir ne '') ? " -dir $docu_rdir" : " -no_subdir");
  258. # use l2h_tmp, if specified
  259. $call .= " -tmp ".Texi2HTML::Config::get_conf('L2H_TMP')
  260. if (defined(Texi2HTML::Config::get_conf('L2H_TMP')) and Texi2HTML::Config::get_conf('L2H_TMP') ne '');
  261. # use a given html version if specified
  262. $call .= " -html_version ".Texi2HTML::Config::get_conf('L2H_HTML_VERSION')
  263. if (defined(Texi2HTML::Config::get_conf('L2H_HTML_VERSION')) and Texi2HTML::Config::get_conf('L2H_HTML_VERSION') ne '');
  264. # options we want to be sure of
  265. $call .= " -address 0 -info 0 -split 0 -no_navigation -no_auto_link";
  266. $call .= " -prefix $l2h_prefix $l2h_latex_file";
  267. warn "# l2h: executing '$call'\n" if ($verbose);
  268. if (system($call))
  269. {
  270. main::document_error ("l2h: '${call}' did not succeed");
  271. return 0;
  272. }
  273. else
  274. {
  275. warn "# l2h: latex2html finished successfully\n" if ($verbose);
  276. return 1;
  277. }
  278. }
  279. ##########################
  280. # Third stage: Extract generated contents from latex2html run
  281. # Initialize with: init_from_html
  282. # open $l2h_html_file for reading
  283. # reads in contents into array indexed by numbers
  284. # return 1, on success -- 0, otherwise
  285. # Finish with: finish
  286. # closes $l2h_html_dir/$l2h_name.".$docu_ext"
  287. # the images generated by latex2html have names like ${docu_name}_l2h_img?.png
  288. # they are copied to ${docu_name}_?.png, and html is changed accordingly.
  289. # FIXME is it really necessary to bother doing that? Looks like an unneeded
  290. # complication to me (pertusus, 2009), and it could go bad if there is some
  291. # SRC="(.*?)" in the text (though the regexp could be made more specific).
  292. # %l2h_img; # associate src file to destination file
  293. # such that files are not copied twice
  294. sub change_image_file_names($)
  295. {
  296. my $content = shift;
  297. my @images = ($content =~ /SRC="(.*?)"/g);
  298. my ($src, $dest);
  299. for $src (@images)
  300. {
  301. $dest = $l2h_img{$src};
  302. unless ($dest)
  303. {
  304. my $ext = '';
  305. if ($src =~ /.*\.(.*)$/ and (!defined(Texi2HTML::Config::get_conf('EXTENSION')) or $1 ne Texi2HTML::Config::get_conf('EXTENSION')))
  306. {
  307. $ext = ".$1";
  308. }
  309. else
  310. { # A warning when the image extension is the same than the
  311. # document extension. copying the file could result in
  312. # overwriting an output file (almost surely if the default
  313. # texi2html file names are used).
  314. main::document_warn ("L2h image $src has invalid extension");
  315. next;
  316. }
  317. while (-e "$docu_rdir${docu_name}_${image_count}$ext")
  318. {
  319. $image_count++;
  320. }
  321. $dest = "${docu_name}_${image_count}$ext";
  322. if ($debug)
  323. {
  324. # not portable, but only used with debug.
  325. system("cp -f $docu_rdir$src $docu_rdir$dest");
  326. }
  327. else
  328. {
  329. # FIXME error condition not checked.
  330. rename ("$docu_rdir$src", "$docu_rdir$dest");
  331. }
  332. $l2h_img{$src} = $dest;
  333. #unlink "$docu_rdir$src" unless ($debug);
  334. }
  335. $content =~ s/SRC="$src"/SRC="$dest"/g;
  336. }
  337. return $content;
  338. }
  339. sub init_from_html()
  340. {
  341. # when there are no tex constructs to convert (happens in case everything
  342. # comes from the cache), the html file that was generated by previous
  343. # latex2html runs isn't reused.
  344. if ($latex_converted_count == 0)
  345. {
  346. return 1;
  347. }
  348. if (! open(L2H_HTML, "<$l2h_html_file"))
  349. {
  350. main::document_warn ("l2h: Can't open $l2h_html_file for reading");
  351. return 0;
  352. }
  353. warn "# l2h: use $l2h_html_file as html file\n" if ($verbose);
  354. my $html_converted_count = 0; # number of html resulting texts
  355. # retrieved in the file
  356. my ($count, $h_line);
  357. while ($h_line = <L2H_HTML>)
  358. {
  359. if ($h_line =~ /!-- l2h_begin $l2h_name ([0-9]+) --/)
  360. {
  361. $count = $1;
  362. my $h_content = '';
  363. my $h_end_found = 0;
  364. while ($h_line = <L2H_HTML>)
  365. {
  366. if ($h_line =~ /!-- l2h_end $l2h_name $count --/)
  367. {
  368. $h_end_found = 1;
  369. chomp $h_content;
  370. chomp $h_content;
  371. $html_converted_count++;
  372. # transform image file names and copy image files
  373. $h_content = change_image_file_names($h_content);
  374. # store result in the html result array
  375. $l2h_from_html[$count] = $h_content;
  376. # also add the result in cache hash
  377. $l2h_cache{$l2h_to_latex[$count]} = $h_content;
  378. last;
  379. }
  380. $h_content = $h_content.$h_line;
  381. }
  382. unless ($h_end_found)
  383. { # couldn't found the closing comment. Certainly a bug.
  384. main::msg_debug ("l2h: l2h_end $l2h_name $count not found");
  385. close(L2H_HTML);
  386. return 0;
  387. }
  388. }
  389. }
  390. # Not the same number of converted elements and retrieved elements
  391. if ($latex_converted_count != $html_converted_count)
  392. {
  393. main::msg_debug ("l2h: waiting for $latex_converted_count elements found $html_converted_count");
  394. }
  395. warn "# l2h: Got $html_converted_count of $latex_count html contents\n"
  396. if ($verbose);
  397. close(L2H_HTML);
  398. return 1;
  399. }
  400. # $html_output_count = 0; # html text outputed in html result file
  401. # called each time a construct handled by latex2html is encountered, should
  402. # output the corresponding html
  403. sub do_tex($$$$)
  404. {
  405. my $style = shift;
  406. my $counter = shift;
  407. my $state = shift;
  408. return unless ($status);
  409. my $count = $global_count{"${style}_$counter"};
  410. ################################## begin debug section (incorrect counts)
  411. if (!defined($count))
  412. {
  413. # counter is undefined
  414. $invalid_counter_count++;
  415. main::msg_debug ("l2h: undefined count for ${style}_$counter");
  416. return ("<!-- l2h: ". __LINE__ . " undef count for ${style}_$counter -->")
  417. if ($debug);
  418. return '';
  419. }
  420. elsif(($count <= 0) or ($count > $latex_count))
  421. {
  422. # counter out of range
  423. $invalid_counter_count++;
  424. main::msg_debug ("l2h: Request of $count content which is out of valide range [0,$latex_count)");
  425. return ("<!-- l2h: ". __LINE__ . " out of range count $count -->")
  426. if ($debug);
  427. return '';
  428. }
  429. ################################## end debug section (incorrect counts)
  430. # this seems to be a valid counter
  431. my $result = '';
  432. $result = "<!-- l2h_begin $l2h_name $count -->" if ($debug);
  433. if (defined($l2h_from_html[$count]))
  434. {
  435. $html_output_count++;
  436. # maybe we could also have something if simple_format
  437. # with Texi2HTML::Config::protect_text in case there
  438. # was some @math on a line passed through simple_format.
  439. # This would certainly be illegal texinfo, however.
  440. if ($state->{'remove_texi'})
  441. {# don't protect anything
  442. $result .= $l2h_to_latex[$count];
  443. }
  444. else
  445. {
  446. $result .= $l2h_from_html[$count];
  447. }
  448. }
  449. else
  450. {
  451. # if the result is not in @l2h_from_html, there is an error somewhere.
  452. $extract_error_count++;
  453. main::msg_debug ("l2h: can't extract content $count from html");
  454. # try simple (ordinary) substitution (without l2h)
  455. $result .= "<!-- l2h: ". __LINE__ . " use texi2html -->" if ($debug);
  456. $result .= main::substitute_text({}, undef, 'error in l2h', $l2h_to_latex[$count]);
  457. }
  458. $result .= "<!-- l2h_end $l2h_name $count -->" if ($debug);
  459. return $result;
  460. }
  461. # store results in the cache and remove temporary files.
  462. sub finish()
  463. {
  464. return unless($status);
  465. if ($verbose)
  466. {
  467. if ($extract_error_count + $invalid_counter_count)
  468. {
  469. warn "# l2h: finished from html ($extract_error_count extract and $invalid_counter_count invalid counter errors)\n";
  470. }
  471. else
  472. {
  473. warn "# l2h: finished from html (no error)\n";
  474. }
  475. if ($html_output_count != $latex_converted_count)
  476. { # this may happen if @-commands are collected at some places
  477. # but @-command at those places are not expanded later. For
  478. # example @math on @multitable lines.
  479. warn "# l2h: $html_output_count html outputed for $latex_converted_count converted\n";
  480. }
  481. }
  482. store_cache();
  483. if (Texi2HTML::Config::get_conf('L2H_CLEAN'))
  484. {
  485. local ($_);
  486. warn "# l2h: removing temporary files generated by l2h extension\n"
  487. if $verbose;
  488. while (<"$docu_rdir$l2h_name"*>)
  489. {
  490. # FIXME error condition not checked
  491. unlink $_;
  492. }
  493. }
  494. warn "# l2h: Finished\n" if $verbose;
  495. return 1;
  496. }
  497. # the driver of end of first pass and second pass
  498. #
  499. sub latex2html()
  500. {
  501. return unless($status);
  502. return unless ($status = finish_to_latex());
  503. return unless ($status = to_html());
  504. }
  505. ##############################
  506. # stuff for l2h caching
  507. #
  508. # FIXME it is clear that l2h stuff takes very long compared with texi2html
  509. # which is already quite long. However this also adds some complexity
  510. # I tried doing this with a dbm data base, but it did not store all
  511. # keys/values. Hence, I did as latex2html does it
  512. sub init_cache
  513. {
  514. if (-r "$l2h_cache_file")
  515. {
  516. my $rdo = do "$l2h_cache_file";
  517. main::document_error ("l2h: could not load $docu_rdir$l2h_cache_file: $@")
  518. unless ($rdo);
  519. }
  520. }
  521. # store all the text obtained through latex2html
  522. sub store_cache
  523. {
  524. return unless $latex_count;
  525. my ($key, $value);
  526. unless (open(FH, ">$l2h_cache_file"))
  527. {
  528. main::document_error ("l2h: could not open $docu_rdir$l2h_cache_file for writing: $!");
  529. return;
  530. }
  531. #while (($key, $value) = each %l2h_cache)
  532. foreach my $key(sort(keys(%l2h_cache))) {
  533. $value = $l2h_cache{$key};
  534. # escape stuff
  535. $key =~ s|/|\\/|g;
  536. $key =~ s|\\\\/|\\/|g;
  537. # weird, a \ at the end of the key results in an error
  538. # maybe this also broke the dbm database stuff
  539. $key =~ s|\\$|\\\\|;
  540. $value =~ s/\|/\\\|/go;
  541. $value =~ s/\\\\\|/\\\|/go;
  542. $value =~ s|\\\\|\\\\\\\\|g;
  543. print FH "\n\$l2h_cache_key = q/$key/;\n";
  544. print FH "\$l2h_cache{\$l2h_cache_key} = q|$value|;\n";
  545. }
  546. print FH "1;";
  547. close (FH);
  548. }
  549. # return cached html, if it exists for text, and if all pictures
  550. # are there, as well
  551. sub from_cache($)
  552. {
  553. my $text = shift;
  554. my $cached = $l2h_cache{$text};
  555. if (defined($cached))
  556. {
  557. while ($cached =~ m/SRC="(.*?)"/g)
  558. {
  559. unless (-e "$docu_rdir$1")
  560. {
  561. return undef;
  562. }
  563. }
  564. return $cached;
  565. }
  566. return undef;
  567. }
  568. 1;