123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624 |
- #+##############################################################################
- #
- # T2h_l2h.pm: interface to LaTeX2HTML
- #
- # Copyright (C) 1999, 2000, 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 3 of the License,
- # or (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- #
- # This code was taken from the main texi2html file in 2006.
- # Certainly originally written by Olaf Bachmann.
- #
- #-##############################################################################
- require 5.0;
- use strict;
- package Texi2HTML::LaTeX2HTML;
- use Cwd;
- # latex2html conversions consist of three stages:
- # 1) to latex: Put "latex" code into a latex file
- # (init, to_latex, finish_to_latex)
- # 2) to html: Use latex2html to generate corresponding html code and images
- # (to_html)
- # 3) from html: Extract generated code and images from latex2html run
- # (init_from_html, do_tex)
- # init l2h defaults for files and names
- # global variable used for caching
- # FIXME there is no reason for this variable to be global
- use vars qw(
- %l2h_cache
- );
- my ($l2h_name, $l2h_latex_file, $l2h_cache_file, $l2h_html_file, $l2h_prefix);
- # holds the status of latex2html operations. If 0 it means that there was
- # an error
- my $status = 0;
- my $debug;
- my $verbose;
- my $docu_rdir;
- my $docu_name;
- # init_from_html
- my $extract_error_count;
- my $invalid_counter_count;
- # change_image_file_names
- my %l2h_img; # associate src file to destination file
- # such that files are not copied twice
- my $image_count;
- # do_tex
- my $html_output_count = 0; # html text outputed in html result file
- ##########################
- #
- # First stage: Generation of Latex file
- # Initialize with: init
- # Add content with: to_latex ($text) --> HTML placeholder comment
- # Finish with: finish_to_latex
- #
- my $l2h_latex_preamble = <<EOT;
- % This document was automatically generated by the l2h extenstion of texi2html
- % DO NOT EDIT !!!
- \\documentclass{article}
- \\usepackage{html}
- \\begin{document}
- EOT
- my $l2h_latex_closing = <<EOT;
- \\end{document}
- EOT
- my %l2h_to_latex = (); # associate a latex text with the index in the
- # html result array.
- my @l2h_to_latex = (); # array used to associate the index with
- # the original latex text.
- my $latex_count = 0; # number of latex texts really stored
- my $latex_converted_count = 0; # number of latex texts passed through latex2html
- my $to_latex_count = 0; # total number of latex texts processed
- my $cached_count = 0; # number of cached latex texts
- %l2h_cache = (); # the cache hash. Associate latex text with
- # html from the previous run
- my @l2h_from_html; # array of resulting html
- my %global_count = (); # associate a command name and the
- # corresponding counter to the index in the
- # html result array
- # set $status to 1, if l2h could be initalized properly, to 0 otherwise
- sub init()
- {
- %l2h_to_latex = (); # associate a latex text with the index in the
- # html result array.
- @l2h_to_latex = (); # array used to associate the index with
- # the original latex text.
- $latex_count = 0; # number of latex texts really stored
- $latex_converted_count = 0; # number of latex texts passed through latex2html
- $to_latex_count = 0; # total number of latex texts processed
- $cached_count = 0; # number of cached latex texts
- %l2h_cache = (); # the cache hash. Associate latex text with
- # html from the previous run
- @l2h_from_html = (); # array of resulting html
- %global_count = (); # associate a command name and the
- # corresponding counter to the index in the
- # html result array
- $extract_error_count = 0;
- $invalid_counter_count = 0;
- %l2h_img = (); # associate src file to destination file
- # such that files are not copied twice
- $image_count = 1;
- $html_output_count = 0; # html text outputed in html result file
- $status = 0;
- return if ($Texi2HTML::Config::null_device_file{$Texi2HTML::THISDOC{'filename'}->{'top'}});
- $docu_name = $Texi2HTML::THISDOC{'file_base_name'};
- $docu_rdir = $Texi2HTML::THISDOC{'destination_directory'};
- $docu_rdir = '' if (!defined($docu_rdir));
- $l2h_name = "${docu_name}_l2h";
- $l2h_latex_file = "$docu_rdir${l2h_name}.tex";
- $l2h_cache_file = "${docu_rdir}${docu_name}-l2h_cache.pm";
- # destination dir -- generated images are put there, should be the same
- # as dir of enclosing html document --
- $l2h_html_file = "$docu_rdir${l2h_name}.html";
- $l2h_prefix = "${l2h_name}_";
- $debug = $Texi2HTML::THISDOC{'debug_l2h'};
- $verbose = Texi2HTML::Config::get_conf('VERBOSE');
- unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
- {
- unless (open(L2H_LATEX, ">$l2h_latex_file"))
- {
- main::document_error ("l2h: Can't open latex file '$l2h_latex_file' for writing: $!");
- $status = 0;
- return;
- }
- warn "# l2h: use ${l2h_latex_file} as latex file\n" if ($verbose);
- print L2H_LATEX $l2h_latex_preamble;
- }
- # open the database that holds cached text
- init_cache() if (!defined(Texi2HTML::Config::get_conf('L2H_SKIP')) or Texi2HTML::Config::get_conf('L2H_SKIP'));
- $status = 1;
- }
- # print text (2nd arg) into latex file (if not already there nor in cache)
- # which can be later on replaced by the latex2html generated text.
- #
- sub to_latex($$$)
- {
- my $command = shift;
- my $text = shift;
- my $counter = shift;
- return unless ($status);
- if ($command eq 'tex')
- {
- $text .= ' ';
- }
- elsif ($command eq 'math')
- {
- $text = "\$".$text."\$";
- }
- $to_latex_count++;
- $text =~ s/(\s*)$//;
- # try whether we have text already on things to do
- my $count = $l2h_to_latex{$text};
- unless ($count)
- {
- $latex_count++;
- $count = $latex_count;
- # try whether we can get it from cache
- my $cached_text = from_cache($text);
- if (defined($cached_text))
- {
- $cached_count++;
- # put the cached result in the html result array
- $l2h_from_html[$count] = $cached_text;
- }
- else
- {
- $latex_converted_count++;
- unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
- {
- print L2H_LATEX "\\begin{rawhtml}\n\n";
- print L2H_LATEX "<!-- l2h_begin $l2h_name $count -->\n";
- print L2H_LATEX "\\end{rawhtml}\n";
- print L2H_LATEX "$text\n";
- print L2H_LATEX "\\begin{rawhtml}\n";
- print L2H_LATEX "<!-- l2h_end $l2h_name $count -->\n\n";
- print L2H_LATEX "\\end{rawhtml}\n";
- }
- }
- $l2h_to_latex[$count] = $text;
- $l2h_to_latex{$text} = $count;
- }
- $global_count{"${command}_$counter"} = $count;
- return 1;
- }
- # print closing into latex file and close it
- sub finish_to_latex()
- {
- my $reused = $to_latex_count - $latex_converted_count - $cached_count;
- unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
- {
- print L2H_LATEX $l2h_latex_closing;
- close (L2H_LATEX);
- }
- warn "# l2h: finished to latex ($cached_count cached, $reused reused, $latex_converted_count to process)\n" if ($verbose);
- unless ($latex_count)
- {
- # no @tex nor @math
- finish();
- return 0;
- }
- return 1;
- }
- ###################################
- # Second stage: Use latex2html to generate corresponding html code and images
- #
- # to_html([$l2h_latex_file, [$l2h_html_dir]]):
- # Call latex2html on $l2h_latex_file
- # Put images (prefixed with $l2h_name."_") and html file(s) in $l2h_html_dir
- # Return 1, on success
- # 0, otherwise
- #
- sub to_html()
- {
- my ($call, $dotbug);
- # when there are no tex constructs to convert (happens in case everything
- # comes from the cache), there is no latex2html run
- if (Texi2HTML::Config::get_conf('L2H_SKIP') or ($latex_converted_count == 0))
- {
- warn "# l2h: skipping latex2html run\n" if ($verbose);
- return 1;
- }
- # Check for dot in directory where dvips will work
- if (Texi2HTML::Config::get_conf('L2H_TMP'))
- {
- if (Texi2HTML::Config::get_conf('L2H_TMP') =~ /\./)
- {
- main::document_warn ("l2h: l2h_tmp dir contains a dot.");
- $dotbug = 1;
- }
- }
- else
- {
- if (cwd() =~ /\./)
- {
- main::document_warn ("l2h: current dir contains a dot.");
- $dotbug = 1;
- }
- }
- # fix it, if necessary and hope that it works
- #$Texi2HTML::Config::L2H_TMP = "/tmp" if ($dotbug);
- return 0 if ($dotbug);
- $call = Texi2HTML::Config::get_conf('L2H_L2H');
- # use init file, if specified
- #my $init_file = main::locate_init_file(Texi2HTML::Config::get_conf('L2H_FILE'));
- my $init_file = Texi2HTML::Config::get_conf('L2H_FILE');
- $call = $call . " -init_file " . $init_file if (defined($init_file) and $init_file ne '' and -f $init_file and -r $init_file);
- # set output dir
- $call .= (($docu_rdir ne '') ? " -dir $docu_rdir" : " -no_subdir");
- # use l2h_tmp, if specified
- $call .= " -tmp ".Texi2HTML::Config::get_conf('L2H_TMP')
- if (defined(Texi2HTML::Config::get_conf('L2H_TMP')) and Texi2HTML::Config::get_conf('L2H_TMP') ne '');
- # use a given html version if specified
- $call .= " -html_version ".Texi2HTML::Config::get_conf('L2H_HTML_VERSION')
- if (defined(Texi2HTML::Config::get_conf('L2H_HTML_VERSION')) and Texi2HTML::Config::get_conf('L2H_HTML_VERSION') ne '');
- # options we want to be sure of
- $call .= " -address 0 -info 0 -split 0 -no_navigation -no_auto_link";
- $call .= " -prefix $l2h_prefix $l2h_latex_file";
- warn "# l2h: executing '$call'\n" if ($verbose);
- if (system($call))
- {
- main::document_error ("l2h: '${call}' did not succeed");
- return 0;
- }
- else
- {
- warn "# l2h: latex2html finished successfully\n" if ($verbose);
- return 1;
- }
- }
- ##########################
- # Third stage: Extract generated contents from latex2html run
- # Initialize with: init_from_html
- # open $l2h_html_file for reading
- # reads in contents into array indexed by numbers
- # return 1, on success -- 0, otherwise
- # Finish with: finish
- # closes $l2h_html_dir/$l2h_name.".$docu_ext"
- # the images generated by latex2html have names like ${docu_name}_l2h_img?.png
- # they are copied to ${docu_name}_?.png, and html is changed accordingly.
- # FIXME is it really necessary to bother doing that? Looks like an unneeded
- # complication to me (pertusus, 2009), and it could go bad if there is some
- # SRC="(.*?)" in the text (though the regexp could be made more specific).
- # %l2h_img; # associate src file to destination file
- # such that files are not copied twice
- sub change_image_file_names($)
- {
- my $content = shift;
- my @images = ($content =~ /SRC="(.*?)"/g);
- my ($src, $dest);
- for $src (@images)
- {
- $dest = $l2h_img{$src};
- unless ($dest)
- {
- my $ext = '';
- if ($src =~ /.*\.(.*)$/ and (!defined(Texi2HTML::Config::get_conf('EXTENSION')) or $1 ne Texi2HTML::Config::get_conf('EXTENSION')))
- {
- $ext = ".$1";
- }
- else
- { # A warning when the image extension is the same than the
- # document extension. copying the file could result in
- # overwriting an output file (almost surely if the default
- # texi2html file names are used).
- main::document_warn ("L2h image $src has invalid extension");
- next;
- }
- while (-e "$docu_rdir${docu_name}_${image_count}$ext")
- {
- $image_count++;
- }
- $dest = "${docu_name}_${image_count}$ext";
- if ($debug)
- {
- # not portable, but only used with debug.
- system("cp -f $docu_rdir$src $docu_rdir$dest");
- }
- else
- {
- # FIXME error condition not checked.
- rename ("$docu_rdir$src", "$docu_rdir$dest");
- }
- $l2h_img{$src} = $dest;
- #unlink "$docu_rdir$src" unless ($debug);
- }
- $content =~ s/SRC="$src"/SRC="$dest"/g;
- }
- return $content;
- }
- sub init_from_html()
- {
- # when there are no tex constructs to convert (happens in case everything
- # comes from the cache), the html file that was generated by previous
- # latex2html runs isn't reused.
- if ($latex_converted_count == 0)
- {
- return 1;
- }
- if (! open(L2H_HTML, "<$l2h_html_file"))
- {
- main::document_warn ("l2h: Can't open $l2h_html_file for reading");
- return 0;
- }
- warn "# l2h: use $l2h_html_file as html file\n" if ($verbose);
- my $html_converted_count = 0; # number of html resulting texts
- # retrieved in the file
- my ($count, $h_line);
- while ($h_line = <L2H_HTML>)
- {
- if ($h_line =~ /!-- l2h_begin $l2h_name ([0-9]+) --/)
- {
- $count = $1;
- my $h_content = '';
- my $h_end_found = 0;
- while ($h_line = <L2H_HTML>)
- {
- if ($h_line =~ /!-- l2h_end $l2h_name $count --/)
- {
- $h_end_found = 1;
- chomp $h_content;
- chomp $h_content;
- $html_converted_count++;
- # transform image file names and copy image files
- $h_content = change_image_file_names($h_content);
- # store result in the html result array
- $l2h_from_html[$count] = $h_content;
- # also add the result in cache hash
- $l2h_cache{$l2h_to_latex[$count]} = $h_content;
- last;
- }
- $h_content = $h_content.$h_line;
- }
- unless ($h_end_found)
- { # couldn't found the closing comment. Certainly a bug.
- main::msg_debug ("l2h: l2h_end $l2h_name $count not found");
- close(L2H_HTML);
- return 0;
- }
- }
- }
- # Not the same number of converted elements and retrieved elements
- if ($latex_converted_count != $html_converted_count)
- {
- main::msg_debug ("l2h: waiting for $latex_converted_count elements found $html_converted_count");
- }
- warn "# l2h: Got $html_converted_count of $latex_count html contents\n"
- if ($verbose);
- close(L2H_HTML);
- return 1;
- }
- # $html_output_count = 0; # html text outputed in html result file
- # called each time a construct handled by latex2html is encountered, should
- # output the corresponding html
- sub do_tex($$$$)
- {
- my $style = shift;
- my $counter = shift;
- my $state = shift;
- return unless ($status);
- my $count = $global_count{"${style}_$counter"};
- ################################## begin debug section (incorrect counts)
- if (!defined($count))
- {
- # counter is undefined
- $invalid_counter_count++;
- main::msg_debug ("l2h: undefined count for ${style}_$counter");
- return ("<!-- l2h: ". __LINE__ . " undef count for ${style}_$counter -->")
- if ($debug);
- return '';
- }
- elsif(($count <= 0) or ($count > $latex_count))
- {
- # counter out of range
- $invalid_counter_count++;
- main::msg_debug ("l2h: Request of $count content which is out of valide range [0,$latex_count)");
- return ("<!-- l2h: ". __LINE__ . " out of range count $count -->")
- if ($debug);
- return '';
- }
- ################################## end debug section (incorrect counts)
- # this seems to be a valid counter
- my $result = '';
- $result = "<!-- l2h_begin $l2h_name $count -->" if ($debug);
- if (defined($l2h_from_html[$count]))
- {
- $html_output_count++;
- # maybe we could also have something if simple_format
- # with Texi2HTML::Config::protect_text in case there
- # was some @math on a line passed through simple_format.
- # This would certainly be illegal texinfo, however.
- if ($state->{'remove_texi'})
- {# don't protect anything
- $result .= $l2h_to_latex[$count];
- }
- else
- {
- $result .= $l2h_from_html[$count];
- }
- }
- else
- {
- # if the result is not in @l2h_from_html, there is an error somewhere.
- $extract_error_count++;
- main::msg_debug ("l2h: can't extract content $count from html");
- # try simple (ordinary) substitution (without l2h)
- $result .= "<!-- l2h: ". __LINE__ . " use texi2html -->" if ($debug);
- $result .= main::substitute_text({}, undef, 'error in l2h', $l2h_to_latex[$count]);
- }
- $result .= "<!-- l2h_end $l2h_name $count -->" if ($debug);
- return $result;
- }
- # store results in the cache and remove temporary files.
- sub finish()
- {
- return unless($status);
- if ($verbose)
- {
- if ($extract_error_count + $invalid_counter_count)
- {
- warn "# l2h: finished from html ($extract_error_count extract and $invalid_counter_count invalid counter errors)\n";
- }
- else
- {
- warn "# l2h: finished from html (no error)\n";
- }
- if ($html_output_count != $latex_converted_count)
- { # this may happen if @-commands are collected at some places
- # but @-command at those places are not expanded later. For
- # example @math on @multitable lines.
- warn "# l2h: $html_output_count html outputed for $latex_converted_count converted\n";
- }
- }
- store_cache();
- if (Texi2HTML::Config::get_conf('L2H_CLEAN'))
- {
- local ($_);
- warn "# l2h: removing temporary files generated by l2h extension\n"
- if $verbose;
- while (<"$docu_rdir$l2h_name"*>)
- {
- # FIXME error condition not checked
- unlink $_;
- }
- }
- warn "# l2h: Finished\n" if $verbose;
- return 1;
- }
- # the driver of end of first pass and second pass
- #
- sub latex2html()
- {
- return unless($status);
- return unless ($status = finish_to_latex());
- return unless ($status = to_html());
- }
- ##############################
- # stuff for l2h caching
- #
- # FIXME it is clear that l2h stuff takes very long compared with texi2html
- # which is already quite long. However this also adds some complexity
- # I tried doing this with a dbm data base, but it did not store all
- # keys/values. Hence, I did as latex2html does it
- sub init_cache
- {
- if (-r "$l2h_cache_file")
- {
- my $rdo = do "$l2h_cache_file";
- main::document_error ("l2h: could not load $docu_rdir$l2h_cache_file: $@")
- unless ($rdo);
- }
- }
- # store all the text obtained through latex2html
- sub store_cache
- {
- return unless $latex_count;
- my ($key, $value);
- unless (open(FH, ">$l2h_cache_file"))
- {
- main::document_error ("l2h: could not open $docu_rdir$l2h_cache_file for writing: $!");
- return;
- }
- #while (($key, $value) = each %l2h_cache)
- foreach my $key(sort(keys(%l2h_cache))) {
- $value = $l2h_cache{$key};
- # escape stuff
- $key =~ s|/|\\/|g;
- $key =~ s|\\\\/|\\/|g;
- # weird, a \ at the end of the key results in an error
- # maybe this also broke the dbm database stuff
- $key =~ s|\\$|\\\\|;
- $value =~ s/\|/\\\|/go;
- $value =~ s/\\\\\|/\\\|/go;
- $value =~ s|\\\\|\\\\\\\\|g;
- print FH "\n\$l2h_cache_key = q/$key/;\n";
- print FH "\$l2h_cache{\$l2h_cache_key} = q|$value|;\n";
- }
- print FH "1;";
- close (FH);
- }
- # return cached html, if it exists for text, and if all pictures
- # are there, as well
- sub from_cache($)
- {
- my $text = shift;
- my $cached = $l2h_cache{$text};
- if (defined($cached))
- {
- while ($cached =~ m/SRC="(.*?)"/g)
- {
- unless (-e "$docu_rdir$1")
- {
- return undef;
- }
- }
- return $cached;
- }
- return undef;
- }
- 1;
|