123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453 |
- #! /usr/bin/env perl
- #
- # texixml2texi -- convert Texinfo XML to Texinfo code
- #
- # Copyright 2012 Free Software Foundation, Inc.
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 3 of the License,
- # or (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- #
- # Original author: Patrice Dumas <pertusus@free.fr>
- use strict;
- use Getopt::Long qw(GetOptions);
- # for dirname.
- use File::Basename;
- use File::Spec;
- Getopt::Long::Configure("gnu_getopt");
- BEGIN
- {
- # emulate -w
- $^W = 1;
- my ($real_command_name, $command_directory, $command_suffix)
- = fileparse($0, '.pl');
- my $datadir = '@datadir@';
- my $package = '@PACKAGE@';
- my $updir = File::Spec->updir();
- my $texinfolibdir;
- my $lib_dir;
- # in-source run
- if (($command_suffix eq '.pl' and !(defined($ENV{'TEXINFO_DEV_SOURCE'})
- and $ENV{'TEXINFO_DEV_SOURCE'} eq 0)) or $ENV{'TEXINFO_DEV_SOURCE'}) {
- my $srcdir = defined $ENV{'srcdir'} ? $ENV{'srcdir'} : $command_directory;
- $texinfolibdir = File::Spec->catdir($srcdir, $updir, 'tp');
- $lib_dir = File::Spec->catdir($texinfolibdir, 'maintain');
- unshift @INC, $texinfolibdir;
- } elsif ($datadir ne '@' .'datadir@' and $package ne '@' . 'PACKAGE@'
- and $datadir ne '') {
- $texinfolibdir = File::Spec->catdir($datadir, $package);
- # try to make package relocatable, will only work if standard relative paths
- # are used
- if (! -f File::Spec->catfile($texinfolibdir, 'Texinfo', 'Parser.pm')
- and -f File::Spec->catfile($command_directory, $updir, 'share',
- 'texinfo', 'Texinfo', 'Parser.pm')) {
- $texinfolibdir = File::Spec->catdir($command_directory, $updir,
- 'share', 'texinfo');
- }
- $lib_dir = $texinfolibdir;
- unshift @INC, $texinfolibdir;
- }
- # '@USE_EXTERNAL_LIBINTL @ and similar are substituted in the
- # makefile using values from configure
- if (defined($texinfolibdir)) {
- if ('@USE_EXTERNAL_LIBINTL@' ne 'yes') {
- unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'libintl-perl', 'lib'));
- }
- if ('@USE_EXTERNAL_EASTASIANWIDTH@' ne 'yes') {
- unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Unicode-EastAsianWidth', 'lib'));
- }
- if ('@USE_EXTERNAL_UNIDECODE@' ne 'yes') {
- unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Text-Unidecode', 'lib'));
- }
- }
- }
- use XML::LibXML::Reader;
- # gather information on Texinfo XML elements
- use Texinfo::Common;
- use Texinfo::Convert::TexinfoXML;
- my $debug = 0;
- my $result_options = Getopt::Long::GetOptions (
- 'debug|d' => \$debug,
- );
- sub command_with_braces($)
- {
- my $command = shift;
- if ($command =~ /^[a-z]/i) {
- return "\@".$command.'{}';
- } else {
- return "\@".$command;
- }
- }
- my %ignored_elements = (
- 'prepend' => 1,
- 'formalarg' => 1,
- # not ignored everytime
- 'indexterm' => 1,
- );
- my %elements_end_attributes = (
- 'accent' => 1,
- 'menunode' => 1,
- 'menutitle' => 1,
- );
- my %element_at_commands;
- my %entity_texts = (
- 'textldquo' => '``',
- 'textrdquo' => "''",
- 'textmdash' => '---',
- 'textndash' => '--',
- 'textrsquo' => "'",
- 'textlsquo' => '`',
- 'formfeed' => "\f",
- # this is not used in pratice, as attrformfeed appears in an
- # attribute and thus is already expanded to text.
- 'attrformfeed' => "\f",
- );
- foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_formatting)) {
- if (!ref($Texinfo::Convert::TexinfoXML::commands_formatting{$command})) {
- $entity_texts{$Texinfo::Convert::TexinfoXML::commands_formatting{$command}}
- = command_with_braces($command);
- } else {
- my $spec = $Texinfo::Convert::TexinfoXML::commands_formatting{$command};
- my $element = $spec->[0];
- if ($element eq 'spacecmd') {
- if ($spec->[1] eq 'type') {
- $element_at_commands{$element}->{"type"}->{$spec->[2]}
- = command_with_braces($command);
- } else {
- die "BUG, bad spacecmd specification";
- }
- } else {
- $element_at_commands{$element} = command_with_braces($command);
- }
- }
- }
- $element_at_commands{'accent'} = 0;
- my %arg_elements;
- foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_args_elements)) {
- my $arg_index = 0;
- foreach my $element_argument (@{$Texinfo::Convert::TexinfoXML::commands_args_elements{$command}}) {
- $arg_elements{$element_argument} = [$arg_index, $command];
- $arg_index++;
- }
- }
- my %accent_type_command;
- foreach my $accent_command (keys(%Texinfo::Convert::TexinfoXML::accent_types)) {
- $accent_type_command{$Texinfo::Convert::TexinfoXML::accent_types{$accent_command}}
- = $accent_command;
- }
- my %eat_space_elements;
- foreach my $element ('texinfo', 'filename') {
- $eat_space_elements{$element} = 1;
- }
- my $infile = shift @ARGV;
- if (!defined($infile) or $infile !~ /\S/) {
- die "Missing file\n";
- }
- my $reader = XML::LibXML::Reader->new('location' => $infile,
- 'expand_entities' => 0,
- )
- or die "cannot read $infile\n";
- #(my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in
- #my $txi_dtd_libdir = "$mydir"; # find tp relative to $0
- sub skip_until_end($$)
- {
- my $reader = shift;
- my $name = shift;
- while ($reader->read) {
- if ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT
- and $reader->name eq $name) {
- return;
- }
- }
- }
- my $eat_space = 0;
- my @commands_with_args_stack;
- while ($reader->read) {
- # ============================================================ begin debug
- if ($debug) {
- printf STDERR "(args: @commands_with_args_stack) (eat_space $eat_space) %d %d %s %d", ($reader->depth,
- $reader->nodeType,
- $reader->name,
- $reader->isEmptyElement);
- my $value = '';
- if ($reader->hasValue()) {
- $value = $reader->value();
- $value =~ s/\n/\\n/g;
- print STDERR " |$value|";
- }
- if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT
- and $reader->hasAttributes()
- and defined($reader->getAttribute('spaces'))) {
- my $spaces = $reader->getAttribute('spaces');
- print STDERR " spaces:$spaces|";
- }
- print STDERR "\n";
- }
- # ============================================================ end debug
- if ($reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
- and $eat_space) {
- $eat_space = 0;
- next;
- } elsif ($reader->nodeType() eq XML_READER_TYPE_TEXT
- or $reader->nodeType() eq XML_READER_TYPE_WHITESPACE
- or $reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
- ) {
- if ($reader->hasValue()) {
- print $reader->value();
- }
- }
- my $name = $reader->name;
- if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT) {
- if (($name eq 'entry' or $name eq 'indexcommand')
- and $reader->hasAttributes()
- and defined($reader->getAttribute('command'))) {
- $name = $reader->getAttribute('command');
- } elsif ($name eq 'listitem') {
- $name = 'item';
- }
- if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
- push @commands_with_args_stack, 0;
- }
- if (exists $element_at_commands{$name}) {
- if ($name eq 'accent') {
- if ($reader->hasAttributes()) {
- if (defined($reader->getAttribute('type'))) {
- my $command = $accent_type_command{$reader->getAttribute('type')};
- print "\@$command"
- if (defined($command));
- }
- if (!defined($reader->getAttribute('spaces'))
- and !(defined($reader->getAttribute('bracketed'))
- and $reader->getAttribute('bracketed') eq 'off')) {
- print '{';
- }
- } else {
- print '{';
- }
- } elsif (!ref($element_at_commands{$name})) {
- print $element_at_commands{$name};
- } else {
- my ($attribute) = keys(%{$element_at_commands{$name}});
- if ($reader->hasAttributes()
- and defined($reader->getAttribute($attribute))) {
- print
- $element_at_commands{$name}->{$attribute}->{$reader->getAttribute($attribute)};
- }
- }
- } elsif (exists($Texinfo::Common::brace_commands{$name})) {
- print "\@${name}{";
- if ($name eq 'verb' and $reader->hasAttributes()
- and defined($reader->getAttribute('delimiter'))) {
- print $reader->getAttribute('delimiter');
- }
- } elsif (exists($Texinfo::Common::block_commands{$name})) {
- print "\@$name";
- if ($name eq 'macro') {
- if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
- print $reader->getAttribute('line');
- }
- print "\n";
- }
- } elsif (defined($Texinfo::Common::misc_commands{$name})) {
- if ($reader->hasAttributes()
- and defined($reader->getAttribute('originalcommand'))) {
- $name = $reader->getAttribute('originalcommand');
- }
- if ($name eq 'documentencoding' and $reader->hasAttributes()
- and defined($reader->getAttribute('encoding'))) {
- my ($texinfo_encoding, $perl_encoding, $output_encoding)
- = Texinfo::Encoding::encoding_alias($reader->getAttribute('encoding'));
- if (defined($perl_encoding)) {
- if ($debug) {
- print STDERR "Using encoding $perl_encoding\n";
- }
- binmode(STDOUT, ":encoding($perl_encoding)");
- }
- }
- print "\@$name";
- if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
- my $line = $reader->getAttribute('line');
- $line =~ s/\\\\/\x{1F}/g;
- $line =~ s/\\f/\f/g;
- $line =~ s/\x{1F}/\\/g;
- print $line;
- }
- if ($name eq 'set' or $name eq 'clickstyle') {
- skip_until_end($reader, $name);
- }
- } elsif ($arg_elements{$name}) {
- if ($reader->hasAttributes()
- and defined($reader->getAttribute('automatic'))
- and $reader->getAttribute('automatic') eq 'on') {
- skip_until_end($reader, $name);
- next;
- }
- while ($arg_elements{$name}->[0]
- and $commands_with_args_stack[-1] < $arg_elements{$name}->[0]) {
- $commands_with_args_stack[-1]++;
- print ',';
- }
- } elsif ($ignored_elements{$name}) {
- my $keep_indexterm = 0;
- if ($name eq 'indexterm') {
- my $node_path = $reader->nodePath();
- if ($node_path =~ m:([a-z]+)/indexterm$:) {
- my $parent = $1;
- if ($parent =~ /^[a-z]?[a-z]index$/ or $parent eq 'indexcommand') {
- $keep_indexterm = 1;
- }
- }
- }
- if (!$keep_indexterm) {
- skip_until_end($reader, $name);
- next;
- }
- } elsif ($name eq 'formattingcommand') {
- if ($reader->hasAttributes()
- and defined($reader->getAttribute('command'))) {
- print '@'.$reader->getAttribute('command');
- }
- # def* automatic
- } elsif ($reader->hasAttributes()
- and defined($reader->getAttribute('automatic'))
- and $reader->getAttribute('automatic') eq 'on') {
- skip_until_end($reader, $name);
- # eat the following space
- $reader->read();
- } elsif ($eat_space_elements{$name}) {
- $eat_space = 1;
- } else {
- print STDERR "UNKNOWN $name\n" if ($debug);
- }
- if ($reader->hasAttributes()) {
- if (defined($reader->getAttribute('bracketed'))
- and $reader->getAttribute('bracketed') eq 'on') {
- print '{';
- }
- if (defined($reader->getAttribute('spaces'))) {
- my $spaces = $reader->getAttribute('spaces');
- $spaces =~ s/\\n/\n/g;
- $spaces =~ s/\\f/\f/g;
- print $spaces;
- }
- if (defined($reader->getAttribute('leadingtext'))) {
- print $reader->getAttribute('leadingtext');
- }
- }
- if ($Texinfo::Common::item_line_commands{$name}
- and $reader->hasAttributes()
- and defined($reader->getAttribute('commandarg'))) {
- print '@'.$reader->getAttribute('commandarg');
- }
- } elsif ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT) {
- if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
- pop @commands_with_args_stack;
- }
- if ($reader->hasAttributes()) {
- if (defined($reader->getAttribute('bracketed'))
- and $reader->getAttribute('bracketed') eq 'on') {
- print '}';
- }
- }
- if (exists ($Texinfo::Common::brace_commands{$name})) {
- if ($name eq 'verb' and $reader->hasAttributes()
- and defined($reader->getAttribute('delimiter'))) {
- print $reader->getAttribute('delimiter');
- }
- print '}';
- } elsif (exists($Texinfo::Common::block_commands{$name})) {
- my $end_spaces;
- if ($reader->hasAttributes()
- and defined($reader->getAttribute('endspaces'))) {
- $end_spaces = $reader->getAttribute('endspaces');
- }
- $end_spaces = ' ' if (!defined($end_spaces) or $end_spaces eq '');
- print "\@end".$end_spaces."$name";
- } elsif (defined($Texinfo::Common::misc_commands{$name})) {
- if ($Texinfo::Common::root_commands{$name} and $name ne 'node') {
- $eat_space = 1;
- }
- } elsif ($elements_end_attributes{$name}) {
- if ($name eq 'accent') {
- if ($reader->hasAttributes()) {
- if (!defined($reader->getAttribute('spaces'))
- and !(defined($reader->getAttribute('bracketed'))
- and $reader->getAttribute('bracketed') eq 'off')) {
- print '}';
- }
- } else {
- print '}';
- }
- } elsif ($reader->hasAttributes()
- and defined($reader->getAttribute('separator'))) {
- print $reader->getAttribute('separator');
- }
- } elsif ($eat_space_elements{$name}) {
- $eat_space = 1;
- } else {
- print STDERR "END UNKNOWN $name\n" if ($debug);
- }
- if ($reader->hasAttributes()
- and defined($reader->getAttribute('trailingspaces'))) {
- my $trailingspaces = $reader->getAttribute('trailingspaces');
- $trailingspaces =~ s/\\f/\f/g;
- print $trailingspaces;
- }
- } elsif ($reader->nodeType() eq XML_READER_TYPE_ENTITY_REFERENCE) {
- if (defined($entity_texts{$name})) {
- print $entity_texts{$name};
- }
- } elsif ($reader->nodeType() eq XML_READER_TYPE_COMMENT) {
- my $comment;
- if ($reader->hasValue()) {
- $comment = $reader->value();
- $comment =~ s/^ (comment|c)//;
- my $command = $1;
- $comment =~ s/ $//;
- print "\@${command}$comment";
- }
- } elsif ($reader->nodeType() eq XML_READER_TYPE_DOCUMENT_TYPE) {
- $eat_space = 1;
- }
- }
- 1;
|