txixml2texi.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. #! /usr/bin/env perl
  2. #
  3. # texixml2texi -- convert Texinfo XML to Texinfo code
  4. #
  5. # Copyright 2012 Free Software Foundation, Inc.
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation; either version 3 of the License,
  10. # or (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. #
  20. # Original author: Patrice Dumas <pertusus@free.fr>
  21. use strict;
  22. use Getopt::Long qw(GetOptions);
  23. # for dirname.
  24. use File::Basename;
  25. use File::Spec;
  26. Getopt::Long::Configure("gnu_getopt");
  27. BEGIN
  28. {
  29. # emulate -w
  30. $^W = 1;
  31. my ($real_command_name, $command_directory, $command_suffix)
  32. = fileparse($0, '.pl');
  33. my $datadir = '@datadir@';
  34. my $package = '@PACKAGE@';
  35. my $updir = File::Spec->updir();
  36. my $texinfolibdir;
  37. my $lib_dir;
  38. # in-source run
  39. if (($command_suffix eq '.pl' and !(defined($ENV{'TEXINFO_DEV_SOURCE'})
  40. and $ENV{'TEXINFO_DEV_SOURCE'} eq 0)) or $ENV{'TEXINFO_DEV_SOURCE'}) {
  41. my $srcdir = defined $ENV{'srcdir'} ? $ENV{'srcdir'} : $command_directory;
  42. $texinfolibdir = File::Spec->catdir($srcdir, $updir, 'tp');
  43. $lib_dir = File::Spec->catdir($texinfolibdir, 'maintain');
  44. unshift @INC, $texinfolibdir;
  45. } elsif ($datadir ne '@' .'datadir@' and $package ne '@' . 'PACKAGE@'
  46. and $datadir ne '') {
  47. $texinfolibdir = File::Spec->catdir($datadir, $package);
  48. # try to make package relocatable, will only work if standard relative paths
  49. # are used
  50. if (! -f File::Spec->catfile($texinfolibdir, 'Texinfo', 'Parser.pm')
  51. and -f File::Spec->catfile($command_directory, $updir, 'share',
  52. 'texinfo', 'Texinfo', 'Parser.pm')) {
  53. $texinfolibdir = File::Spec->catdir($command_directory, $updir,
  54. 'share', 'texinfo');
  55. }
  56. $lib_dir = $texinfolibdir;
  57. unshift @INC, $texinfolibdir;
  58. }
  59. # '@USE_EXTERNAL_LIBINTL @ and similar are substituted in the
  60. # makefile using values from configure
  61. if (defined($texinfolibdir)) {
  62. if ('@USE_EXTERNAL_LIBINTL@' ne 'yes') {
  63. unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'libintl-perl', 'lib'));
  64. }
  65. if ('@USE_EXTERNAL_EASTASIANWIDTH@' ne 'yes') {
  66. unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Unicode-EastAsianWidth', 'lib'));
  67. }
  68. if ('@USE_EXTERNAL_UNIDECODE@' ne 'yes') {
  69. unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Text-Unidecode', 'lib'));
  70. }
  71. }
  72. }
  73. use XML::LibXML::Reader;
  74. # gather information on Texinfo XML elements
  75. use Texinfo::Common;
  76. use Texinfo::Convert::TexinfoXML;
  77. my $debug = 0;
  78. my $result_options = Getopt::Long::GetOptions (
  79. 'debug|d' => \$debug,
  80. );
  81. sub command_with_braces($)
  82. {
  83. my $command = shift;
  84. if ($command =~ /^[a-z]/i) {
  85. return "\@".$command.'{}';
  86. } else {
  87. return "\@".$command;
  88. }
  89. }
  90. my %ignored_elements = (
  91. 'prepend' => 1,
  92. 'formalarg' => 1,
  93. # not ignored everytime
  94. 'indexterm' => 1,
  95. );
  96. my %elements_end_attributes = (
  97. 'accent' => 1,
  98. 'menunode' => 1,
  99. 'menutitle' => 1,
  100. );
  101. my %element_at_commands;
  102. my %entity_texts = (
  103. 'textldquo' => '``',
  104. 'textrdquo' => "''",
  105. 'textmdash' => '---',
  106. 'textndash' => '--',
  107. 'textrsquo' => "'",
  108. 'textlsquo' => '`',
  109. 'formfeed' => "\f",
  110. # this is not used in pratice, as attrformfeed appears in an
  111. # attribute and thus is already expanded to text.
  112. 'attrformfeed' => "\f",
  113. );
  114. foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_formatting)) {
  115. if (!ref($Texinfo::Convert::TexinfoXML::commands_formatting{$command})) {
  116. $entity_texts{$Texinfo::Convert::TexinfoXML::commands_formatting{$command}}
  117. = command_with_braces($command);
  118. } else {
  119. my $spec = $Texinfo::Convert::TexinfoXML::commands_formatting{$command};
  120. my $element = $spec->[0];
  121. if ($element eq 'spacecmd') {
  122. if ($spec->[1] eq 'type') {
  123. $element_at_commands{$element}->{"type"}->{$spec->[2]}
  124. = command_with_braces($command);
  125. } else {
  126. die "BUG, bad spacecmd specification";
  127. }
  128. } else {
  129. $element_at_commands{$element} = command_with_braces($command);
  130. }
  131. }
  132. }
  133. $element_at_commands{'accent'} = 0;
  134. my %arg_elements;
  135. foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_args_elements)) {
  136. my $arg_index = 0;
  137. foreach my $element_argument (@{$Texinfo::Convert::TexinfoXML::commands_args_elements{$command}}) {
  138. $arg_elements{$element_argument} = [$arg_index, $command];
  139. $arg_index++;
  140. }
  141. }
  142. my %accent_type_command;
  143. foreach my $accent_command (keys(%Texinfo::Convert::TexinfoXML::accent_types)) {
  144. $accent_type_command{$Texinfo::Convert::TexinfoXML::accent_types{$accent_command}}
  145. = $accent_command;
  146. }
  147. my %eat_space_elements;
  148. foreach my $element ('texinfo', 'filename') {
  149. $eat_space_elements{$element} = 1;
  150. }
  151. my $infile = shift @ARGV;
  152. if (!defined($infile) or $infile !~ /\S/) {
  153. die "Missing file\n";
  154. }
  155. my $reader = XML::LibXML::Reader->new('location' => $infile,
  156. 'expand_entities' => 0,
  157. )
  158. or die "cannot read $infile\n";
  159. #(my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in
  160. #my $txi_dtd_libdir = "$mydir"; # find tp relative to $0
  161. sub skip_until_end($$)
  162. {
  163. my $reader = shift;
  164. my $name = shift;
  165. while ($reader->read) {
  166. if ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT
  167. and $reader->name eq $name) {
  168. return;
  169. }
  170. }
  171. }
  172. my $eat_space = 0;
  173. my @commands_with_args_stack;
  174. while ($reader->read) {
  175. # ============================================================ begin debug
  176. if ($debug) {
  177. printf STDERR "(args: @commands_with_args_stack) (eat_space $eat_space) %d %d %s %d", ($reader->depth,
  178. $reader->nodeType,
  179. $reader->name,
  180. $reader->isEmptyElement);
  181. my $value = '';
  182. if ($reader->hasValue()) {
  183. $value = $reader->value();
  184. $value =~ s/\n/\\n/g;
  185. print STDERR " |$value|";
  186. }
  187. if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT
  188. and $reader->hasAttributes()
  189. and defined($reader->getAttribute('spaces'))) {
  190. my $spaces = $reader->getAttribute('spaces');
  191. print STDERR " spaces:$spaces|";
  192. }
  193. print STDERR "\n";
  194. }
  195. # ============================================================ end debug
  196. if ($reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
  197. and $eat_space) {
  198. $eat_space = 0;
  199. next;
  200. } elsif ($reader->nodeType() eq XML_READER_TYPE_TEXT
  201. or $reader->nodeType() eq XML_READER_TYPE_WHITESPACE
  202. or $reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
  203. ) {
  204. if ($reader->hasValue()) {
  205. print $reader->value();
  206. }
  207. }
  208. my $name = $reader->name;
  209. if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT) {
  210. if (($name eq 'entry' or $name eq 'indexcommand')
  211. and $reader->hasAttributes()
  212. and defined($reader->getAttribute('command'))) {
  213. $name = $reader->getAttribute('command');
  214. } elsif ($name eq 'listitem') {
  215. $name = 'item';
  216. }
  217. if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
  218. push @commands_with_args_stack, 0;
  219. }
  220. if (exists $element_at_commands{$name}) {
  221. if ($name eq 'accent') {
  222. if ($reader->hasAttributes()) {
  223. if (defined($reader->getAttribute('type'))) {
  224. my $command = $accent_type_command{$reader->getAttribute('type')};
  225. print "\@$command"
  226. if (defined($command));
  227. }
  228. if (!defined($reader->getAttribute('spaces'))
  229. and !(defined($reader->getAttribute('bracketed'))
  230. and $reader->getAttribute('bracketed') eq 'off')) {
  231. print '{';
  232. }
  233. } else {
  234. print '{';
  235. }
  236. } elsif (!ref($element_at_commands{$name})) {
  237. print $element_at_commands{$name};
  238. } else {
  239. my ($attribute) = keys(%{$element_at_commands{$name}});
  240. if ($reader->hasAttributes()
  241. and defined($reader->getAttribute($attribute))) {
  242. print
  243. $element_at_commands{$name}->{$attribute}->{$reader->getAttribute($attribute)};
  244. }
  245. }
  246. } elsif (exists($Texinfo::Common::brace_commands{$name})) {
  247. print "\@${name}{";
  248. if ($name eq 'verb' and $reader->hasAttributes()
  249. and defined($reader->getAttribute('delimiter'))) {
  250. print $reader->getAttribute('delimiter');
  251. }
  252. } elsif (exists($Texinfo::Common::block_commands{$name})) {
  253. print "\@$name";
  254. if ($name eq 'macro') {
  255. if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
  256. print $reader->getAttribute('line');
  257. }
  258. print "\n";
  259. }
  260. } elsif (defined($Texinfo::Common::misc_commands{$name})) {
  261. if ($reader->hasAttributes()
  262. and defined($reader->getAttribute('originalcommand'))) {
  263. $name = $reader->getAttribute('originalcommand');
  264. }
  265. if ($name eq 'documentencoding' and $reader->hasAttributes()
  266. and defined($reader->getAttribute('encoding'))) {
  267. my ($texinfo_encoding, $perl_encoding, $output_encoding)
  268. = Texinfo::Encoding::encoding_alias($reader->getAttribute('encoding'));
  269. if (defined($perl_encoding)) {
  270. if ($debug) {
  271. print STDERR "Using encoding $perl_encoding\n";
  272. }
  273. binmode(STDOUT, ":encoding($perl_encoding)");
  274. }
  275. }
  276. print "\@$name";
  277. if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
  278. my $line = $reader->getAttribute('line');
  279. $line =~ s/\\\\/\x{1F}/g;
  280. $line =~ s/\\f/\f/g;
  281. $line =~ s/\x{1F}/\\/g;
  282. print $line;
  283. }
  284. if ($name eq 'set' or $name eq 'clickstyle') {
  285. skip_until_end($reader, $name);
  286. }
  287. } elsif ($arg_elements{$name}) {
  288. if ($reader->hasAttributes()
  289. and defined($reader->getAttribute('automatic'))
  290. and $reader->getAttribute('automatic') eq 'on') {
  291. skip_until_end($reader, $name);
  292. next;
  293. }
  294. while ($arg_elements{$name}->[0]
  295. and $commands_with_args_stack[-1] < $arg_elements{$name}->[0]) {
  296. $commands_with_args_stack[-1]++;
  297. print ',';
  298. }
  299. } elsif ($ignored_elements{$name}) {
  300. my $keep_indexterm = 0;
  301. if ($name eq 'indexterm') {
  302. my $node_path = $reader->nodePath();
  303. if ($node_path =~ m:([a-z]+)/indexterm$:) {
  304. my $parent = $1;
  305. if ($parent =~ /^[a-z]?[a-z]index$/ or $parent eq 'indexcommand') {
  306. $keep_indexterm = 1;
  307. }
  308. }
  309. }
  310. if (!$keep_indexterm) {
  311. skip_until_end($reader, $name);
  312. next;
  313. }
  314. } elsif ($name eq 'formattingcommand') {
  315. if ($reader->hasAttributes()
  316. and defined($reader->getAttribute('command'))) {
  317. print '@'.$reader->getAttribute('command');
  318. }
  319. # def* automatic
  320. } elsif ($reader->hasAttributes()
  321. and defined($reader->getAttribute('automatic'))
  322. and $reader->getAttribute('automatic') eq 'on') {
  323. skip_until_end($reader, $name);
  324. # eat the following space
  325. $reader->read();
  326. } elsif ($eat_space_elements{$name}) {
  327. $eat_space = 1;
  328. } else {
  329. print STDERR "UNKNOWN $name\n" if ($debug);
  330. }
  331. if ($reader->hasAttributes()) {
  332. if (defined($reader->getAttribute('bracketed'))
  333. and $reader->getAttribute('bracketed') eq 'on') {
  334. print '{';
  335. }
  336. if (defined($reader->getAttribute('spaces'))) {
  337. my $spaces = $reader->getAttribute('spaces');
  338. $spaces =~ s/\\n/\n/g;
  339. $spaces =~ s/\\f/\f/g;
  340. print $spaces;
  341. }
  342. if (defined($reader->getAttribute('leadingtext'))) {
  343. print $reader->getAttribute('leadingtext');
  344. }
  345. }
  346. if ($Texinfo::Common::item_line_commands{$name}
  347. and $reader->hasAttributes()
  348. and defined($reader->getAttribute('commandarg'))) {
  349. print '@'.$reader->getAttribute('commandarg');
  350. }
  351. } elsif ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT) {
  352. if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
  353. pop @commands_with_args_stack;
  354. }
  355. if ($reader->hasAttributes()) {
  356. if (defined($reader->getAttribute('bracketed'))
  357. and $reader->getAttribute('bracketed') eq 'on') {
  358. print '}';
  359. }
  360. }
  361. if (exists ($Texinfo::Common::brace_commands{$name})) {
  362. if ($name eq 'verb' and $reader->hasAttributes()
  363. and defined($reader->getAttribute('delimiter'))) {
  364. print $reader->getAttribute('delimiter');
  365. }
  366. print '}';
  367. } elsif (exists($Texinfo::Common::block_commands{$name})) {
  368. my $end_spaces;
  369. if ($reader->hasAttributes()
  370. and defined($reader->getAttribute('endspaces'))) {
  371. $end_spaces = $reader->getAttribute('endspaces');
  372. }
  373. $end_spaces = ' ' if (!defined($end_spaces) or $end_spaces eq '');
  374. print "\@end".$end_spaces."$name";
  375. } elsif (defined($Texinfo::Common::misc_commands{$name})) {
  376. if ($Texinfo::Common::root_commands{$name} and $name ne 'node') {
  377. $eat_space = 1;
  378. }
  379. } elsif ($elements_end_attributes{$name}) {
  380. if ($name eq 'accent') {
  381. if ($reader->hasAttributes()) {
  382. if (!defined($reader->getAttribute('spaces'))
  383. and !(defined($reader->getAttribute('bracketed'))
  384. and $reader->getAttribute('bracketed') eq 'off')) {
  385. print '}';
  386. }
  387. } else {
  388. print '}';
  389. }
  390. } elsif ($reader->hasAttributes()
  391. and defined($reader->getAttribute('separator'))) {
  392. print $reader->getAttribute('separator');
  393. }
  394. } elsif ($eat_space_elements{$name}) {
  395. $eat_space = 1;
  396. } else {
  397. print STDERR "END UNKNOWN $name\n" if ($debug);
  398. }
  399. if ($reader->hasAttributes()
  400. and defined($reader->getAttribute('trailingspaces'))) {
  401. my $trailingspaces = $reader->getAttribute('trailingspaces');
  402. $trailingspaces =~ s/\\f/\f/g;
  403. print $trailingspaces;
  404. }
  405. } elsif ($reader->nodeType() eq XML_READER_TYPE_ENTITY_REFERENCE) {
  406. if (defined($entity_texts{$name})) {
  407. print $entity_texts{$name};
  408. }
  409. } elsif ($reader->nodeType() eq XML_READER_TYPE_COMMENT) {
  410. my $comment;
  411. if ($reader->hasValue()) {
  412. $comment = $reader->value();
  413. $comment =~ s/^ (comment|c)//;
  414. my $command = $1;
  415. $comment =~ s/ $//;
  416. print "\@${command}$comment";
  417. }
  418. } elsif ($reader->nodeType() eq XML_READER_TYPE_DOCUMENT_TYPE) {
  419. $eat_space = 1;
  420. }
  421. }
  422. 1;